diff --git a/adapter_model.bin b/adapter_model.bin index 881c929ef6feee8884c6c40245ae107a561cf26b..5de3f3e6c9e61b9bd3e08f609bbd9ec5373788b9 100644 --- a/adapter_model.bin +++ b/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:834da4210cbe02d35ef0db3b3658d8c7275dda5e4c2e20ff5a4d8f2c935cd877 +oid sha256:beeedc987a15ea2421995f369107f9b533b5d424088562667fdc59e9d36e27f2 size 113314765 diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b32efe7366f05d1d90816d2ad9e4b06ccca46bea --- /dev/null +++ b/checkpoint-600/README.md @@ -0,0 +1,219 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 + +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e108f2da037ef6250457c67a4bedd308d97303c --- /dev/null +++ b/checkpoint-600/adapter_config.json @@ -0,0 +1,24 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-600/adapter_model.bin b/checkpoint-600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c98e4dc10d4397572ad2233bf1f2ae5d0a10fcb --- /dev/null +++ b/checkpoint-600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e89ebdd6587ad945d5187588bd36ac4fd4361c7ed1a2e67c15bf52aafd5e3b98 +size 113314765 diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ae7df044a47da4d59810d802b04d0388f367571 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e961eee9681942c9b315a838e8c3305c3384b8c6dbacb3b820d1779f6e573c23 +size 226653957 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0eac287fe6aaa4f5ead2a49c7c4f92cbe6cc9e4 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3bd16cf2784e27d887ba26f43636be027528577198ffa7914b576e44c1fd6ad +size 14575 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8b78778de95ad4ef2d142354e507dc82c66b32c --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f7cf85b820e7f67d841ddbeab7ac10af6e481bc637d729f2badec258f8130b +size 627 diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b0c37d1b93b481e094b45de6ce15c0e2f4f92d7c --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,3619 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1559754483090625, + "eval_steps": 500, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6869, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.8396, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 0.7489, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7252, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.6548, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 0.8022, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6524, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6981, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 0.7488, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 0.6368, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.6891, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 0.7968, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 5.2000000000000004e-05, + "loss": 0.6912, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 5.6000000000000006e-05, + "loss": 0.8452, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 6e-05, + "loss": 0.6989, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 6.400000000000001e-05, + "loss": 0.6685, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-05, + "loss": 0.5469, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 7.2e-05, + "loss": 0.7915, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 7.6e-05, + "loss": 0.7744, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 8e-05, + "loss": 0.6804, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 8.4e-05, + "loss": 0.7796, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 8.800000000000001e-05, + "loss": 0.706, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000001e-05, + "loss": 0.6798, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 9.6e-05, + "loss": 0.6333, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001, + "loss": 0.6012, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010400000000000001, + "loss": 0.52, + "step": 26 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010800000000000001, + "loss": 0.6583, + "step": 27 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011200000000000001, + "loss": 0.7354, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 0.000116, + "loss": 0.6296, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012, + "loss": 0.6352, + "step": 30 + }, + { + "epoch": 0.06, + "learning_rate": 0.000124, + "loss": 0.6007, + "step": 31 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012800000000000002, + "loss": 0.5659, + "step": 32 + }, + { + "epoch": 0.06, + "learning_rate": 0.000132, + "loss": 0.5138, + "step": 33 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013600000000000003, + "loss": 0.6639, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014, + "loss": 0.5934, + "step": 35 + }, + { + "epoch": 0.07, + "learning_rate": 0.000144, + "loss": 0.5233, + "step": 36 + }, + { + "epoch": 0.07, + "learning_rate": 0.000148, + "loss": 0.5307, + "step": 37 + }, + { + "epoch": 0.07, + "learning_rate": 0.000152, + "loss": 0.5928, + "step": 38 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015600000000000002, + "loss": 0.5908, + "step": 39 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016, + "loss": 0.6366, + "step": 40 + }, + { + "epoch": 0.08, + "learning_rate": 0.000164, + "loss": 0.5972, + "step": 41 + }, + { + "epoch": 0.08, + "learning_rate": 0.000168, + "loss": 0.4825, + "step": 42 + }, + { + "epoch": 0.08, + "learning_rate": 0.000172, + "loss": 0.6783, + "step": 43 + }, + { + "epoch": 0.08, + "learning_rate": 0.00017600000000000002, + "loss": 0.6082, + "step": 44 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018, + "loss": 0.7633, + "step": 45 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018400000000000003, + "loss": 0.5988, + "step": 46 + }, + { + "epoch": 0.09, + "learning_rate": 0.000188, + "loss": 0.6658, + "step": 47 + }, + { + "epoch": 0.09, + "learning_rate": 0.000192, + "loss": 0.5945, + "step": 48 + }, + { + "epoch": 0.09, + "learning_rate": 0.000196, + "loss": 0.5984, + "step": 49 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002, + "loss": 0.6778, + "step": 50 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020400000000000003, + "loss": 0.6057, + "step": 51 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020800000000000001, + "loss": 0.601, + "step": 52 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021200000000000003, + "loss": 0.5566, + "step": 53 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021600000000000002, + "loss": 0.5911, + "step": 54 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022000000000000003, + "loss": 0.7636, + "step": 55 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022400000000000002, + "loss": 0.5537, + "step": 56 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022799999999999999, + "loss": 0.6037, + "step": 57 + }, + { + "epoch": 0.11, + "learning_rate": 0.000232, + "loss": 0.6474, + "step": 58 + }, + { + "epoch": 0.11, + "learning_rate": 0.000236, + "loss": 0.6483, + "step": 59 + }, + { + "epoch": 0.12, + "learning_rate": 0.00024, + "loss": 0.5021, + "step": 60 + }, + { + "epoch": 0.12, + "learning_rate": 0.000244, + "loss": 0.5347, + "step": 61 + }, + { + "epoch": 0.12, + "learning_rate": 0.000248, + "loss": 0.5791, + "step": 62 + }, + { + "epoch": 0.12, + "learning_rate": 0.000252, + "loss": 0.5407, + "step": 63 + }, + { + "epoch": 0.12, + "learning_rate": 0.00025600000000000004, + "loss": 0.5298, + "step": 64 + }, + { + "epoch": 0.13, + "learning_rate": 0.00026000000000000003, + "loss": 0.5685, + "step": 65 + }, + { + "epoch": 0.13, + "learning_rate": 0.000264, + "loss": 0.5108, + "step": 66 + }, + { + "epoch": 0.13, + "learning_rate": 0.000268, + "loss": 0.526, + "step": 67 + }, + { + "epoch": 0.13, + "learning_rate": 0.00027200000000000005, + "loss": 0.6843, + "step": 68 + }, + { + "epoch": 0.13, + "learning_rate": 0.000276, + "loss": 0.6608, + "step": 69 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028, + "loss": 0.5866, + "step": 70 + }, + { + "epoch": 0.14, + "learning_rate": 0.000284, + "loss": 0.6422, + "step": 71 + }, + { + "epoch": 0.14, + "learning_rate": 0.000288, + "loss": 0.449, + "step": 72 + }, + { + "epoch": 0.14, + "learning_rate": 0.000292, + "loss": 0.5319, + "step": 73 + }, + { + "epoch": 0.14, + "learning_rate": 0.000296, + "loss": 0.5977, + "step": 74 + }, + { + "epoch": 0.14, + "learning_rate": 0.00030000000000000003, + "loss": 0.5805, + "step": 75 + }, + { + "epoch": 0.15, + "learning_rate": 0.000304, + "loss": 0.5209, + "step": 76 + }, + { + "epoch": 0.15, + "learning_rate": 0.000308, + "loss": 0.6098, + "step": 77 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031200000000000005, + "loss": 0.4665, + "step": 78 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031600000000000004, + "loss": 0.6882, + "step": 79 + }, + { + "epoch": 0.15, + "learning_rate": 0.00032, + "loss": 0.5427, + "step": 80 + }, + { + "epoch": 0.16, + "learning_rate": 0.000324, + "loss": 0.5345, + "step": 81 + }, + { + "epoch": 0.16, + "learning_rate": 0.000328, + "loss": 0.663, + "step": 82 + }, + { + "epoch": 0.16, + "learning_rate": 0.000332, + "loss": 0.5393, + "step": 83 + }, + { + "epoch": 0.16, + "learning_rate": 0.000336, + "loss": 0.5711, + "step": 84 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034, + "loss": 0.5261, + "step": 85 + }, + { + "epoch": 0.17, + "learning_rate": 0.000344, + "loss": 0.5775, + "step": 86 + }, + { + "epoch": 0.17, + "learning_rate": 0.000348, + "loss": 0.6329, + "step": 87 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035200000000000005, + "loss": 0.4425, + "step": 88 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035600000000000003, + "loss": 0.6837, + "step": 89 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036, + "loss": 0.615, + "step": 90 + }, + { + "epoch": 0.18, + "learning_rate": 0.000364, + "loss": 0.5615, + "step": 91 + }, + { + "epoch": 0.18, + "learning_rate": 0.00036800000000000005, + "loss": 0.5434, + "step": 92 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037200000000000004, + "loss": 0.5864, + "step": 93 + }, + { + "epoch": 0.18, + "learning_rate": 0.000376, + "loss": 0.5583, + "step": 94 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038, + "loss": 0.5299, + "step": 95 + }, + { + "epoch": 0.18, + "learning_rate": 0.000384, + "loss": 0.532, + "step": 96 + }, + { + "epoch": 0.19, + "learning_rate": 0.000388, + "loss": 0.5227, + "step": 97 + }, + { + "epoch": 0.19, + "learning_rate": 0.000392, + "loss": 0.5275, + "step": 98 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039600000000000003, + "loss": 0.4541, + "step": 99 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004, + "loss": 0.6485, + "step": 100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003999995350775973, + "loss": 0.5438, + "step": 101 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999814031255063, + "loss": 0.5997, + "step": 102 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999581571134455, + "loss": 0.5322, + "step": 103 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003999925612847867, + "loss": 0.484, + "step": 104 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998837704800766, + "loss": 0.5961, + "step": 105 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998326302046085, + "loss": 0.7405, + "step": 106 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997721922592255, + "loss": 0.5802, + "step": 107 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997024569249167, + "loss": 0.769, + "step": 108 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999623424525898, + "loss": 0.5598, + "step": 109 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999535095429608, + "loss": 0.6143, + "step": 110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039994374700467095, + "loss": 0.5766, + "step": 111 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039993305488310836, + "loss": 0.7695, + "step": 112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999214332279831, + "loss": 0.7153, + "step": 113 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999088820933269, + "loss": 0.5835, + "step": 114 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039989540153749286, + "loss": 0.6634, + "step": 115 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399880991623155, + "loss": 0.6069, + "step": 116 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998656524173082, + "loss": 0.7224, + "step": 117 + }, + { + "epoch": 0.23, + "learning_rate": 0.000399849383991268, + "loss": 0.5884, + "step": 118 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998321864206699, + "loss": 0.5122, + "step": 119 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039981405978546924, + "loss": 0.6453, + "step": 120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997950041699408, + "loss": 0.4665, + "step": 121 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997750196626785, + "loss": 0.5428, + "step": 122 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039975410635659464, + "loss": 0.4365, + "step": 123 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039973226434891995, + "loss": 0.5978, + "step": 124 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039970949374120286, + "loss": 0.7729, + "step": 125 + }, + { + "epoch": 0.24, + "learning_rate": 0.000399685794639309, + "loss": 0.6212, + "step": 126 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039966116715342066, + "loss": 0.5426, + "step": 127 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039963561139803676, + "loss": 0.5782, + "step": 128 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003996091274919716, + "loss": 0.6701, + "step": 129 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995817155583548, + "loss": 0.6314, + "step": 130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995533757246307, + "loss": 0.6662, + "step": 131 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995241081225573, + "loss": 0.5192, + "step": 132 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994939128882065, + "loss": 0.5591, + "step": 133 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994627901619625, + "loss": 0.5809, + "step": 134 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994307400885219, + "loss": 0.4871, + "step": 135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993977628168928, + "loss": 0.6666, + "step": 136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993638585003938, + "loss": 0.6469, + "step": 137 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039932902729665357, + "loss": 0.5727, + "step": 138 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039929326936761036, + "loss": 0.6715, + "step": 139 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039925658487951067, + "loss": 0.5686, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039921897400290894, + "loss": 0.501, + "step": 141 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039918043691266665, + "loss": 0.5795, + "step": 142 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039914097378795124, + "loss": 0.6287, + "step": 143 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039910058481223564, + "loss": 0.7016, + "step": 144 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039905927017329726, + "loss": 0.6232, + "step": 145 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039901703006321715, + "loss": 0.5291, + "step": 146 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039897386467837903, + "loss": 0.5297, + "step": 147 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892977421946844, + "loss": 0.5784, + "step": 148 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988847588914718, + "loss": 0.5714, + "step": 149 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988388189036754, + "loss": 0.5044, + "step": 150 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987919544696646, + "loss": 0.8246, + "step": 151 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987441658073226, + "loss": 0.5048, + "step": 152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003986954531388297, + "loss": 0.5433, + "step": 153 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039864581669066186, + "loss": 0.5251, + "step": 154 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003985952566935902, + "loss": 0.5708, + "step": 155 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039854377338267936, + "loss": 0.6276, + "step": 156 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039849136699728684, + "loss": 0.4915, + "step": 157 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003984380377810617, + "loss": 0.6389, + "step": 158 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039838378598194325, + "loss": 0.6067, + "step": 159 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039832861185216045, + "loss": 0.6136, + "step": 160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003982725156482301, + "loss": 0.5597, + "step": 161 + }, + { + "epoch": 0.31, + "learning_rate": 0.000398215497630956, + "loss": 0.5957, + "step": 162 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003981575580654278, + "loss": 0.5853, + "step": 163 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980986972210194, + "loss": 0.5462, + "step": 164 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980389153713881, + "loss": 0.5302, + "step": 165 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039797821279447307, + "loss": 0.5395, + "step": 166 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039791658977249425, + "loss": 0.7004, + "step": 167 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039785404659195084, + "loss": 0.5622, + "step": 168 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039779058354362013, + "loss": 0.5759, + "step": 169 + }, + { + "epoch": 0.33, + "learning_rate": 0.000397726200922556, + "loss": 0.6184, + "step": 170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003976608990280877, + "loss": 0.5488, + "step": 171 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975946781638183, + "loss": 0.6162, + "step": 172 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975275386376236, + "loss": 0.558, + "step": 173 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003974594807616502, + "loss": 0.519, + "step": 174 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003973905048523144, + "loss": 0.6195, + "step": 175 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039732061123030064, + "loss": 0.5991, + "step": 176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003972498002205601, + "loss": 0.5428, + "step": 177 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039717807215230896, + "loss": 0.5323, + "step": 178 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039710542735902705, + "loss": 0.5307, + "step": 179 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003970318661784564, + "loss": 0.5783, + "step": 180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003969573889525993, + "loss": 0.5924, + "step": 181 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039688199602771714, + "loss": 0.5902, + "step": 182 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039680568775432855, + "loss": 0.6291, + "step": 183 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003967284644872077, + "loss": 0.5942, + "step": 184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003966503265853829, + "loss": 0.4878, + "step": 185 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003965712744121347, + "loss": 0.6487, + "step": 186 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003964913083349945, + "loss": 0.6111, + "step": 187 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039641042872574233, + "loss": 0.6072, + "step": 188 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039632863596040575, + "loss": 0.716, + "step": 189 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039624593041925763, + "loss": 0.6178, + "step": 190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003961623124868145, + "loss": 0.6323, + "step": 191 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039607778255183485, + "loss": 0.5821, + "step": 192 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959923410073174, + "loss": 0.6738, + "step": 193 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959059882504989, + "loss": 0.6203, + "step": 194 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039581872468285277, + "loss": 0.632, + "step": 195 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003957305507100868, + "loss": 0.5857, + "step": 196 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039564146674214164, + "loss": 0.6311, + "step": 197 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003955514731931885, + "loss": 0.5889, + "step": 198 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039546057048162763, + "loss": 0.5201, + "step": 199 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039536875903008607, + "loss": 0.5581, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039527603926541586, + "loss": 0.5104, + "step": 201 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039518241161869193, + "loss": 0.5978, + "step": 202 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039508787652521013, + "loss": 0.6244, + "step": 203 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039499243442448536, + "loss": 0.589, + "step": 204 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003948960857602493, + "loss": 0.575, + "step": 205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947988309804485, + "loss": 0.5494, + "step": 206 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947006705372422, + "loss": 0.4895, + "step": 207 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039460160488700036, + "loss": 0.5479, + "step": 208 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039450163449030124, + "loss": 0.5893, + "step": 209 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003944007598119297, + "loss": 0.5451, + "step": 210 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003942989813208747, + "loss": 0.5582, + "step": 211 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003941962994903273, + "loss": 0.5121, + "step": 212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039409271479767826, + "loss": 0.6324, + "step": 213 + }, + { + "epoch": 0.41, + "learning_rate": 0.000393988227724516, + "loss": 0.6118, + "step": 214 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003938828387566244, + "loss": 0.6303, + "step": 215 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003937765483839804, + "loss": 0.7705, + "step": 216 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003936693571007517, + "loss": 0.6224, + "step": 217 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003935612654052946, + "loss": 0.5664, + "step": 218 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039345227380015163, + "loss": 0.66, + "step": 219 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039334238279204906, + "loss": 0.5582, + "step": 220 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039323159289189505, + "loss": 0.6087, + "step": 221 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003931199046147764, + "loss": 0.5566, + "step": 222 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039300731847995716, + "loss": 0.5775, + "step": 223 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039289383501087534, + "loss": 0.5081, + "step": 224 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039277945473514104, + "loss": 0.5218, + "step": 225 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003926641781845338, + "loss": 0.6655, + "step": 226 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003925480058950002, + "loss": 0.5735, + "step": 227 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039243093840665114, + "loss": 0.6609, + "step": 228 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003923129762637596, + "loss": 0.7323, + "step": 229 + }, + { + "epoch": 0.44, + "learning_rate": 0.000392194120014758, + "loss": 0.5703, + "step": 230 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039207437021223583, + "loss": 0.6545, + "step": 231 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003919537274129366, + "loss": 0.521, + "step": 232 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039183219217775564, + "loss": 0.5257, + "step": 233 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003917097650717377, + "loss": 0.5487, + "step": 234 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039158644666407365, + "loss": 0.4861, + "step": 235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039146223752809845, + "loss": 0.4928, + "step": 236 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003913371382412883, + "loss": 0.5253, + "step": 237 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039121114938525756, + "loss": 0.6155, + "step": 238 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039108427154575684, + "loss": 0.55, + "step": 239 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039095650531266967, + "loss": 0.6617, + "step": 240 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039082785128000976, + "loss": 0.5198, + "step": 241 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039069831004591866, + "loss": 0.5302, + "step": 242 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003905678822126625, + "loss": 0.5347, + "step": 243 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039043656838662946, + "loss": 0.531, + "step": 244 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039030436917832697, + "loss": 0.4884, + "step": 245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039017128520237883, + "loss": 0.6027, + "step": 246 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003900373170775222, + "loss": 0.5537, + "step": 247 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038990246542660494, + "loss": 0.5753, + "step": 248 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038976673087658256, + "loss": 0.5059, + "step": 249 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038963011405851537, + "loss": 0.5118, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038949261560756565, + "loss": 0.5645, + "step": 251 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003893542361629944, + "loss": 0.5623, + "step": 252 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038921497636815866, + "loss": 0.5216, + "step": 253 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003890748368705085, + "loss": 0.4501, + "step": 254 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003889338183215838, + "loss": 0.48, + "step": 255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038879192137701135, + "loss": 0.5218, + "step": 256 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003886491466965018, + "loss": 0.5858, + "step": 257 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038850549494384685, + "loss": 0.6124, + "step": 258 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038836096678691536, + "loss": 0.4645, + "step": 259 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038821556289765136, + "loss": 0.474, + "step": 260 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038806928395207003, + "loss": 0.4364, + "step": 261 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038792213063025484, + "loss": 0.5821, + "step": 262 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003877741036163547, + "loss": 0.5393, + "step": 263 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003876252035985804, + "loss": 0.5373, + "step": 264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003874754312692013, + "loss": 0.6021, + "step": 265 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003873247873245426, + "loss": 0.4549, + "step": 266 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003871732724649817, + "loss": 0.5994, + "step": 267 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003870208873949453, + "loss": 0.4764, + "step": 268 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038686763282290556, + "loss": 0.4311, + "step": 269 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003867135094613774, + "loss": 0.5462, + "step": 270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003865585180269148, + "loss": 0.5006, + "step": 271 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003864026592401076, + "loss": 0.5347, + "step": 272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038624593382557835, + "loss": 0.5242, + "step": 273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038608834251197856, + "loss": 0.5005, + "step": 274 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038592988603198554, + "loss": 0.5436, + "step": 275 + }, + { + "epoch": 0.53, + "learning_rate": 0.000385770565122299, + "loss": 0.4658, + "step": 276 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003856103805236375, + "loss": 0.5273, + "step": 277 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038544933298073516, + "loss": 0.436, + "step": 278 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038528742324233804, + "loss": 0.4785, + "step": 279 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038512465206120086, + "loss": 0.5366, + "step": 280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038496102019408324, + "loss": 0.4448, + "step": 281 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038479652840174637, + "loss": 0.5132, + "step": 282 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038463117744894955, + "loss": 0.7918, + "step": 283 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038446496810444627, + "loss": 0.5309, + "step": 284 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038429790114098114, + "loss": 0.5316, + "step": 285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038412997733528576, + "loss": 0.4611, + "step": 286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038396119746807563, + "loss": 0.4609, + "step": 287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038379156232404613, + "loss": 0.5821, + "step": 288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003836210726918691, + "loss": 0.5883, + "step": 289 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003834497293641889, + "loss": 0.5012, + "step": 290 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038327753313761913, + "loss": 0.4457, + "step": 291 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038310448481273867, + "loss": 0.4851, + "step": 292 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038293058519408787, + "loss": 0.5622, + "step": 293 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038275583509016507, + "loss": 0.5703, + "step": 294 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038258023531342265, + "loss": 0.5718, + "step": 295 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003824037866802632, + "loss": 0.5183, + "step": 296 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038222649001103614, + "loss": 0.5085, + "step": 297 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038204834613003323, + "loss": 0.5388, + "step": 298 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038186935586548537, + "loss": 0.5425, + "step": 299 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003816895200495584, + "loss": 0.447, + "step": 300 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003815088395183493, + "loss": 0.5541, + "step": 301 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038132731511188227, + "loss": 0.5518, + "step": 302 + }, + { + "epoch": 0.58, + "learning_rate": 0.000381144947674105, + "loss": 0.5074, + "step": 303 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809617380528847, + "loss": 0.5134, + "step": 304 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807776871000037, + "loss": 0.4599, + "step": 305 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805927956711562, + "loss": 0.5838, + "step": 306 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038040706462594395, + "loss": 0.5216, + "step": 307 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038022049482787216, + "loss": 0.5323, + "step": 308 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003800330871443456, + "loss": 0.5681, + "step": 309 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037984484244666446, + "loss": 0.4172, + "step": 310 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003796557616100207, + "loss": 0.4958, + "step": 311 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003794658455134934, + "loss": 0.662, + "step": 312 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003792750950400451, + "loss": 0.5832, + "step": 313 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003790835110765174, + "loss": 0.4271, + "step": 314 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003788910945136271, + "loss": 0.4842, + "step": 315 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037869784624596186, + "loss": 0.4656, + "step": 316 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037850376717197626, + "loss": 0.4981, + "step": 317 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037830885819398733, + "loss": 0.5162, + "step": 318 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037811312021817067, + "loss": 0.652, + "step": 319 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003779165541545558, + "loss": 0.5104, + "step": 320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003777191609170225, + "loss": 0.4971, + "step": 321 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003775209414232962, + "loss": 0.4871, + "step": 322 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003773218965949436, + "loss": 0.5226, + "step": 323 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037712202735736884, + "loss": 0.4823, + "step": 324 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003769213346398087, + "loss": 0.497, + "step": 325 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003767198193753286, + "loss": 0.5976, + "step": 326 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765174825008181, + "loss": 0.4532, + "step": 327 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003763143249569868, + "loss": 0.5236, + "step": 328 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037611034768835947, + "loss": 0.6513, + "step": 329 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037590555164327224, + "loss": 0.5686, + "step": 330 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037569993777386774, + "loss": 0.456, + "step": 331 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003754935070360909, + "loss": 0.5181, + "step": 332 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003752862603896846, + "loss": 0.4765, + "step": 333 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037507819879818477, + "loss": 0.5363, + "step": 334 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037486932322891646, + "loss": 0.4584, + "step": 335 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037465963465298886, + "loss": 0.5428, + "step": 336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003744491340452913, + "loss": 0.3927, + "step": 337 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003742378223844882, + "loss": 0.5478, + "step": 338 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003740257006530147, + "loss": 0.469, + "step": 339 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037381276983707246, + "loss": 0.5169, + "step": 340 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037359903092662434, + "loss": 0.4797, + "step": 341 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037338448491539054, + "loss": 0.5315, + "step": 342 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037316913280084353, + "loss": 0.4422, + "step": 343 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003729529755842035, + "loss": 0.4426, + "step": 344 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003727360142704337, + "loss": 0.4718, + "step": 345 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003725182498682361, + "loss": 0.5585, + "step": 346 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003722996833900459, + "loss": 0.4775, + "step": 347 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003720803158520279, + "loss": 0.6014, + "step": 348 + }, + { + "epoch": 0.67, + "learning_rate": 0.00037186014827407076, + "loss": 0.5117, + "step": 349 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003716391816797829, + "loss": 0.5404, + "step": 350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003714174170964876, + "loss": 0.527, + "step": 351 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037119485555521796, + "loss": 0.4555, + "step": 352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037097149809071255, + "loss": 0.5372, + "step": 353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037074734574141016, + "loss": 0.5377, + "step": 354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003705223995494454, + "loss": 0.4925, + "step": 355 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037029666056064345, + "loss": 0.482, + "step": 356 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037007012982451546, + "loss": 0.5235, + "step": 357 + }, + { + "epoch": 0.69, + "learning_rate": 0.00036984280839425356, + "loss": 0.4957, + "step": 358 + }, + { + "epoch": 0.69, + "learning_rate": 0.000369614697326726, + "loss": 0.5379, + "step": 359 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003693857976824721, + "loss": 0.4653, + "step": 360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036915611052569785, + "loss": 0.469, + "step": 361 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003689256369242702, + "loss": 0.5618, + "step": 362 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003686943779497124, + "loss": 0.4459, + "step": 363 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003684623346771995, + "loss": 0.5606, + "step": 364 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003682295081855524, + "loss": 0.4368, + "step": 365 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036799589955723375, + "loss": 0.4168, + "step": 366 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036776150987834243, + "loss": 0.4664, + "step": 367 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036752634023860846, + "loss": 0.4737, + "step": 368 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003672903917313883, + "loss": 0.4247, + "step": 369 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036705366545365935, + "loss": 0.5677, + "step": 370 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036681616250601505, + "loss": 0.5441, + "step": 371 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003665778839926599, + "loss": 0.6247, + "step": 372 + }, + { + "epoch": 0.72, + "learning_rate": 0.00036633883102140405, + "loss": 0.5217, + "step": 373 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003660990047036584, + "loss": 0.4651, + "step": 374 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003658584061544291, + "loss": 0.4648, + "step": 375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003656170364923128, + "loss": 0.6048, + "step": 376 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036537489683949114, + "loss": 0.4515, + "step": 377 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003651319883217255, + "loss": 0.5096, + "step": 378 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036488831206835207, + "loss": 0.4231, + "step": 379 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036464386921227637, + "loss": 0.4903, + "step": 380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036439866088996796, + "loss": 0.5131, + "step": 381 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003641526882414553, + "loss": 0.5986, + "step": 382 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003639059524103203, + "loss": 0.6, + "step": 383 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003636584545436931, + "loss": 0.5216, + "step": 384 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003634101957922468, + "loss": 0.5144, + "step": 385 + }, + { + "epoch": 0.74, + "learning_rate": 0.00036316117731019184, + "loss": 0.4963, + "step": 386 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003629114002552711, + "loss": 0.5657, + "step": 387 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036266086578875384, + "loss": 0.5028, + "step": 388 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003624095750754311, + "loss": 0.573, + "step": 389 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036215752928360967, + "loss": 0.5199, + "step": 390 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003619047295851068, + "loss": 0.656, + "step": 391 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036165117715524506, + "loss": 0.5129, + "step": 392 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036139687317284647, + "loss": 0.3945, + "step": 393 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003611418188202271, + "loss": 0.5318, + "step": 394 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036088601528319196, + "loss": 0.5344, + "step": 395 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036062946375102885, + "loss": 0.5407, + "step": 396 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003603721654165034, + "loss": 0.5364, + "step": 397 + }, + { + "epoch": 0.77, + "learning_rate": 0.00036011412147585306, + "loss": 0.5407, + "step": 398 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003598553331287821, + "loss": 0.5999, + "step": 399 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003595958015784555, + "loss": 0.624, + "step": 400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00035933552803149354, + "loss": 0.5351, + "step": 401 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003590745136979662, + "loss": 0.5196, + "step": 402 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035881275979138765, + "loss": 0.5447, + "step": 403 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003585502675287104, + "loss": 0.4908, + "step": 404 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035828703813031986, + "loss": 0.5172, + "step": 405 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035802307282002834, + "loss": 0.5923, + "step": 406 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003577583728250699, + "loss": 0.568, + "step": 407 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035749293937609395, + "loss": 0.4618, + "step": 408 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003572267737071601, + "loss": 0.5351, + "step": 409 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003569598770557322, + "loss": 0.5285, + "step": 410 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035669225066267256, + "loss": 0.4571, + "step": 411 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035642389577223625, + "loss": 0.4214, + "step": 412 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003561548136320653, + "loss": 0.5393, + "step": 413 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003558850054931828, + "loss": 0.549, + "step": 414 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035561447260998714, + "loss": 0.4824, + "step": 415 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035534321624024656, + "loss": 0.6244, + "step": 416 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035507123764509245, + "loss": 0.5436, + "step": 417 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003547985380890144, + "loss": 0.5198, + "step": 418 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035452511883985366, + "loss": 0.5979, + "step": 419 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035425098116879754, + "loss": 0.4158, + "step": 420 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035397612635037356, + "loss": 0.5125, + "step": 421 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035370055566244334, + "loss": 0.4699, + "step": 422 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003534242703861966, + "loss": 0.5553, + "step": 423 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035314727180614573, + "loss": 0.5969, + "step": 424 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035286956121011897, + "loss": 0.456, + "step": 425 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003525911398892552, + "loss": 0.5195, + "step": 426 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003523120091379975, + "loss": 0.5187, + "step": 427 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035203217025408726, + "loss": 0.5443, + "step": 428 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003517516245385582, + "loss": 0.4476, + "step": 429 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003514703732957301, + "loss": 0.5757, + "step": 430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035118841783320304, + "loss": 0.5129, + "step": 431 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035090575946185114, + "loss": 0.6354, + "step": 432 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035062239949581645, + "loss": 0.4065, + "step": 433 + }, + { + "epoch": 0.84, + "learning_rate": 0.000350338339252503, + "loss": 0.5472, + "step": 434 + }, + { + "epoch": 0.84, + "learning_rate": 0.00035005358005257045, + "loss": 0.5424, + "step": 435 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034976812321992816, + "loss": 0.6127, + "step": 436 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034948197008172877, + "loss": 0.63, + "step": 437 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003491951219683625, + "loss": 0.413, + "step": 438 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034890758021345034, + "loss": 0.5435, + "step": 439 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034861934615383844, + "loss": 0.5433, + "step": 440 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034833042112959153, + "loss": 0.4763, + "step": 441 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034804080648398667, + "loss": 0.5727, + "step": 442 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034775050356350727, + "loss": 0.5392, + "step": 443 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034745951371783666, + "loss": 0.4981, + "step": 444 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003471678382998518, + "loss": 0.5516, + "step": 445 + }, + { + "epoch": 0.86, + "learning_rate": 0.00034687547866561703, + "loss": 0.4965, + "step": 446 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003465824361743779, + "loss": 0.4982, + "step": 447 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003462887121885544, + "loss": 0.5619, + "step": 448 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003459943080737353, + "loss": 0.5273, + "step": 449 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034569922519867133, + "loss": 0.517, + "step": 450 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034540346493526876, + "loss": 0.4874, + "step": 451 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003451070286585833, + "loss": 0.5966, + "step": 452 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003448099177468137, + "loss": 0.4487, + "step": 453 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003445121335812951, + "loss": 0.5091, + "step": 454 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003442136775464929, + "loss": 0.407, + "step": 455 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003439145510299958, + "loss": 0.6327, + "step": 456 + }, + { + "epoch": 0.88, + "learning_rate": 0.00034361475542251025, + "loss": 0.4217, + "step": 457 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003433142921178531, + "loss": 0.6102, + "step": 458 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003430131625129456, + "loss": 0.5556, + "step": 459 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034271136800780673, + "loss": 0.4986, + "step": 460 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003424089100055467, + "loss": 0.5406, + "step": 461 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034210578991236056, + "loss": 0.5881, + "step": 462 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034180200913752157, + "loss": 0.4869, + "step": 463 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034149756909337454, + "loss": 0.5626, + "step": 464 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003411924711953295, + "loss": 0.564, + "step": 465 + }, + { + "epoch": 0.9, + "learning_rate": 0.00034088671686185486, + "loss": 0.6272, + "step": 466 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003405803075144711, + "loss": 0.4643, + "step": 467 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003402732445777438, + "loss": 0.5435, + "step": 468 + }, + { + "epoch": 0.9, + "learning_rate": 0.00033996552947927744, + "loss": 0.5844, + "step": 469 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003396571636497084, + "loss": 0.5362, + "step": 470 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033934814852269865, + "loss": 0.5607, + "step": 471 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003390384855349285, + "loss": 0.4836, + "step": 472 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033872817612609065, + "loss": 0.6555, + "step": 473 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033841722173888315, + "loss": 0.4784, + "step": 474 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033810562381900253, + "loss": 0.5583, + "step": 475 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033779338381513736, + "loss": 0.4679, + "step": 476 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003374805031789613, + "loss": 0.5325, + "step": 477 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033716698336512654, + "loss": 0.6601, + "step": 478 + }, + { + "epoch": 0.92, + "learning_rate": 0.000336852825831257, + "loss": 0.4838, + "step": 479 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003365380320379414, + "loss": 0.5588, + "step": 480 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033622260344872665, + "loss": 0.4596, + "step": 481 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003359065415301108, + "loss": 0.5228, + "step": 482 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033558984775153663, + "loss": 0.5316, + "step": 483 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033527252358538437, + "loss": 0.4761, + "step": 484 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003349545705069653, + "loss": 0.5254, + "step": 485 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003346359899945144, + "loss": 0.4786, + "step": 486 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033431678352918384, + "loss": 0.4302, + "step": 487 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003339969525950361, + "loss": 0.4914, + "step": 488 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033367649867903663, + "loss": 0.4102, + "step": 489 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003333554232710477, + "loss": 0.4698, + "step": 490 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003330337278638207, + "loss": 0.4454, + "step": 491 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033271141395298964, + "loss": 0.4648, + "step": 492 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033238848303706415, + "loss": 0.4616, + "step": 493 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033206493661742237, + "loss": 0.4861, + "step": 494 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033174077619830416, + "loss": 0.4797, + "step": 495 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033141600328680373, + "loss": 0.5104, + "step": 496 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033109061939286336, + "loss": 0.5712, + "step": 497 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033076462602926553, + "loss": 0.5425, + "step": 498 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033043802471162636, + "loss": 0.6156, + "step": 499 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003301108169583887, + "loss": 0.4282, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003297830042908146, + "loss": 0.4088, + "step": 501 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032945458823297857, + "loss": 0.4866, + "step": 502 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003291255703117605, + "loss": 0.5045, + "step": 503 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003287959520568384, + "loss": 0.491, + "step": 504 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032846573500068136, + "loss": 0.458, + "step": 505 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032813492067854246, + "loss": 0.4508, + "step": 506 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003278035106284516, + "loss": 0.4294, + "step": 507 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032747150639120834, + "loss": 0.4834, + "step": 508 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032713890951037477, + "loss": 0.3857, + "step": 509 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032680572153226834, + "loss": 0.4072, + "step": 510 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003264719440059545, + "loss": 0.4028, + "step": 511 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032613757848323977, + "loss": 0.3789, + "step": 512 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032580262651866446, + "loss": 0.4944, + "step": 513 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003254670896694952, + "loss": 0.4259, + "step": 514 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032513096949571805, + "loss": 0.5037, + "step": 515 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032479426756003093, + "loss": 0.5857, + "step": 516 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003244569854278366, + "loss": 0.5407, + "step": 517 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032411912466723524, + "loss": 0.499, + "step": 518 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003237806868490172, + "loss": 0.4359, + "step": 519 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032344167354665573, + "loss": 0.4374, + "step": 520 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003231020863362997, + "loss": 0.4172, + "step": 521 + }, + { + "epoch": 1.01, + "learning_rate": 0.000322761926796766, + "loss": 0.4451, + "step": 522 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003224211965095326, + "loss": 0.4, + "step": 523 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003220798970587309, + "loss": 0.4009, + "step": 524 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003217380300311386, + "loss": 0.3966, + "step": 525 + }, + { + "epoch": 1.01, + "learning_rate": 0.000321395597016172, + "loss": 0.4255, + "step": 526 + }, + { + "epoch": 1.01, + "learning_rate": 0.00032105259960587895, + "loss": 0.4707, + "step": 527 + }, + { + "epoch": 1.02, + "learning_rate": 0.00032070903939493124, + "loss": 0.5313, + "step": 528 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003203649179806172, + "loss": 0.3596, + "step": 529 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003200202369628345, + "loss": 0.5223, + "step": 530 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031967499794408234, + "loss": 0.4146, + "step": 531 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031932920252945423, + "loss": 0.4328, + "step": 532 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003189828523266306, + "loss": 0.4258, + "step": 533 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031863594894587105, + "loss": 0.4457, + "step": 534 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003182884940000072, + "loss": 0.5249, + "step": 535 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003179404891044348, + "loss": 0.4751, + "step": 536 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031759193587710676, + "loss": 0.5378, + "step": 537 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031724283593852497, + "loss": 0.634, + "step": 538 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031689319091173326, + "loss": 0.4298, + "step": 539 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031654300242230977, + "loss": 0.5469, + "step": 540 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031619227209835917, + "loss": 0.5153, + "step": 541 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003158410015705053, + "loss": 0.4144, + "step": 542 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003154891924718837, + "loss": 0.6041, + "step": 543 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003151368464381335, + "loss": 0.4891, + "step": 544 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003147839651073904, + "loss": 0.5258, + "step": 545 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031443055012027874, + "loss": 0.4351, + "step": 546 + }, + { + "epoch": 1.05, + "learning_rate": 0.000314076603119904, + "loss": 0.4556, + "step": 547 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031372212575184514, + "loss": 0.5445, + "step": 548 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031336711966414675, + "loss": 0.5585, + "step": 549 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003130115865073117, + "loss": 0.367, + "step": 550 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003126555279342933, + "loss": 0.4877, + "step": 551 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003122989456004876, + "loss": 0.4335, + "step": 552 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003119418411637258, + "loss": 0.4383, + "step": 553 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003115842162842663, + "loss": 0.4508, + "step": 554 + }, + { + "epoch": 1.07, + "learning_rate": 0.00031122607262478743, + "loss": 0.4631, + "step": 555 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003108674118503793, + "loss": 0.3496, + "step": 556 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003105082356285361, + "loss": 0.4108, + "step": 557 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003101485456291486, + "loss": 0.4877, + "step": 558 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030978834352449614, + "loss": 0.3696, + "step": 559 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030942763098923913, + "loss": 0.5138, + "step": 560 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030906640970041084, + "loss": 0.5961, + "step": 561 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003087046813374099, + "loss": 0.3824, + "step": 562 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030834244758199276, + "loss": 0.4925, + "step": 563 + }, + { + "epoch": 1.09, + "learning_rate": 0.000307979710118265, + "loss": 0.4511, + "step": 564 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030761647063267457, + "loss": 0.4306, + "step": 565 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003072527308140031, + "loss": 0.468, + "step": 566 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030688849235335856, + "loss": 0.4842, + "step": 567 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003065237569441671, + "loss": 0.4332, + "step": 568 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030615852628216537, + "loss": 0.4637, + "step": 569 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003057928020653925, + "loss": 0.6193, + "step": 570 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003054265859941824, + "loss": 0.5033, + "step": 571 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030505987977115555, + "loss": 0.4185, + "step": 572 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003046926851012114, + "loss": 0.4211, + "step": 573 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003043250036915201, + "loss": 0.5089, + "step": 574 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030395683725151505, + "loss": 0.517, + "step": 575 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003035881874928845, + "loss": 0.492, + "step": 576 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003032190561295636, + "loss": 0.4535, + "step": 577 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003028494448777269, + "loss": 0.3947, + "step": 578 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030247935545577986, + "loss": 0.3125, + "step": 579 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003021087895843511, + "loss": 0.3882, + "step": 580 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003017377489862845, + "loss": 0.4802, + "step": 581 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030136623538663083, + "loss": 0.4652, + "step": 582 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030099425051263994, + "loss": 0.3816, + "step": 583 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003006217960937529, + "loss": 0.4583, + "step": 584 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030024887386159385, + "loss": 0.4568, + "step": 585 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029987548554996174, + "loss": 0.3908, + "step": 586 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002995016328948225, + "loss": 0.4235, + "step": 587 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029912731763430075, + "loss": 0.4138, + "step": 588 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029875254150867216, + "loss": 0.5838, + "step": 589 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002983773062603548, + "loss": 0.462, + "step": 590 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029800161363390145, + "loss": 0.4632, + "step": 591 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029762546537599125, + "loss": 0.5898, + "step": 592 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002972488632354218, + "loss": 0.4742, + "step": 593 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029687180896310065, + "loss": 0.4579, + "step": 594 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002964943043120378, + "loss": 0.5514, + "step": 595 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029611635103733675, + "loss": 0.4304, + "step": 596 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002957379508961871, + "loss": 0.4383, + "step": 597 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029535910564785584, + "loss": 0.5327, + "step": 598 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029497981705367933, + "loss": 0.4781, + "step": 599 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029460008687705525, + "loss": 0.4178, + "step": 600 + } + ], + "logging_steps": 1, + "max_steps": 1557, + "num_train_epochs": 3, + "save_steps": 50, + "total_flos": 8.028786781032284e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbdf20491848d40e9a89bca19c6229b4b2b55e5d --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07819caee47f45203545f962678d52b5954ff1fd4afe1d5152fad48004402099 +size 4155 diff --git a/checkpoint-650/README.md b/checkpoint-650/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b32efe7366f05d1d90816d2ad9e4b06ccca46bea --- /dev/null +++ b/checkpoint-650/README.md @@ -0,0 +1,219 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 + +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-650/adapter_config.json b/checkpoint-650/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e108f2da037ef6250457c67a4bedd308d97303c --- /dev/null +++ b/checkpoint-650/adapter_config.json @@ -0,0 +1,24 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-650/adapter_model.bin b/checkpoint-650/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8f8773f20e472b1a46a60e0be925fbbb40c5e878 --- /dev/null +++ b/checkpoint-650/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4d4a963234d5b145db85e44f623296620990066a8818059db46bfb6fdae142 +size 113314765 diff --git a/checkpoint-650/optimizer.pt b/checkpoint-650/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a11a97b2ead753daf7f1a52bc6f49d8da251144 --- /dev/null +++ b/checkpoint-650/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:319fe643298cda15dc43201f86c588932cbb2e8ba93d626062a50a30db8ec3f8 +size 226653957 diff --git a/checkpoint-650/rng_state.pth b/checkpoint-650/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e0b6d57634584c89d5063aad9e41133446d7286 --- /dev/null +++ b/checkpoint-650/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bcc6c883b3a4f7fb228632aa870378c86b9dd9a19866e7d6f881e4815d9a9b1 +size 14575 diff --git a/checkpoint-650/scheduler.pt b/checkpoint-650/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2778d642a4ba0b823d14b608547f22e597c683f3 --- /dev/null +++ b/checkpoint-650/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6161499fb6777bbd8c4173364b8988e53a62d1e88cad4a1841153cca0b12c9b6 +size 627 diff --git a/checkpoint-650/trainer_state.json b/checkpoint-650/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..47453fa63c302f0a585790aa69958c381fb05e9e --- /dev/null +++ b/checkpoint-650/trainer_state.json @@ -0,0 +1,3919 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2522565892405826, + "eval_steps": 500, + "global_step": 650, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6869, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.8396, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 0.7489, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7252, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.6548, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 0.8022, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6524, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6981, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 0.7488, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 0.6368, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.6891, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 0.7968, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 5.2000000000000004e-05, + "loss": 0.6912, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 5.6000000000000006e-05, + "loss": 0.8452, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 6e-05, + "loss": 0.6989, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 6.400000000000001e-05, + "loss": 0.6685, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-05, + "loss": 0.5469, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 7.2e-05, + "loss": 0.7915, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 7.6e-05, + "loss": 0.7744, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 8e-05, + "loss": 0.6804, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 8.4e-05, + "loss": 0.7796, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 8.800000000000001e-05, + "loss": 0.706, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000001e-05, + "loss": 0.6798, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 9.6e-05, + "loss": 0.6333, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001, + "loss": 0.6012, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010400000000000001, + "loss": 0.52, + "step": 26 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010800000000000001, + "loss": 0.6583, + "step": 27 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011200000000000001, + "loss": 0.7354, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 0.000116, + "loss": 0.6296, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012, + "loss": 0.6352, + "step": 30 + }, + { + "epoch": 0.06, + "learning_rate": 0.000124, + "loss": 0.6007, + "step": 31 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012800000000000002, + "loss": 0.5659, + "step": 32 + }, + { + "epoch": 0.06, + "learning_rate": 0.000132, + "loss": 0.5138, + "step": 33 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013600000000000003, + "loss": 0.6639, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014, + "loss": 0.5934, + "step": 35 + }, + { + "epoch": 0.07, + "learning_rate": 0.000144, + "loss": 0.5233, + "step": 36 + }, + { + "epoch": 0.07, + "learning_rate": 0.000148, + "loss": 0.5307, + "step": 37 + }, + { + "epoch": 0.07, + "learning_rate": 0.000152, + "loss": 0.5928, + "step": 38 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015600000000000002, + "loss": 0.5908, + "step": 39 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016, + "loss": 0.6366, + "step": 40 + }, + { + "epoch": 0.08, + "learning_rate": 0.000164, + "loss": 0.5972, + "step": 41 + }, + { + "epoch": 0.08, + "learning_rate": 0.000168, + "loss": 0.4825, + "step": 42 + }, + { + "epoch": 0.08, + "learning_rate": 0.000172, + "loss": 0.6783, + "step": 43 + }, + { + "epoch": 0.08, + "learning_rate": 0.00017600000000000002, + "loss": 0.6082, + "step": 44 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018, + "loss": 0.7633, + "step": 45 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018400000000000003, + "loss": 0.5988, + "step": 46 + }, + { + "epoch": 0.09, + "learning_rate": 0.000188, + "loss": 0.6658, + "step": 47 + }, + { + "epoch": 0.09, + "learning_rate": 0.000192, + "loss": 0.5945, + "step": 48 + }, + { + "epoch": 0.09, + "learning_rate": 0.000196, + "loss": 0.5984, + "step": 49 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002, + "loss": 0.6778, + "step": 50 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020400000000000003, + "loss": 0.6057, + "step": 51 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020800000000000001, + "loss": 0.601, + "step": 52 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021200000000000003, + "loss": 0.5566, + "step": 53 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021600000000000002, + "loss": 0.5911, + "step": 54 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022000000000000003, + "loss": 0.7636, + "step": 55 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022400000000000002, + "loss": 0.5537, + "step": 56 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022799999999999999, + "loss": 0.6037, + "step": 57 + }, + { + "epoch": 0.11, + "learning_rate": 0.000232, + "loss": 0.6474, + "step": 58 + }, + { + "epoch": 0.11, + "learning_rate": 0.000236, + "loss": 0.6483, + "step": 59 + }, + { + "epoch": 0.12, + "learning_rate": 0.00024, + "loss": 0.5021, + "step": 60 + }, + { + "epoch": 0.12, + "learning_rate": 0.000244, + "loss": 0.5347, + "step": 61 + }, + { + "epoch": 0.12, + "learning_rate": 0.000248, + "loss": 0.5791, + "step": 62 + }, + { + "epoch": 0.12, + "learning_rate": 0.000252, + "loss": 0.5407, + "step": 63 + }, + { + "epoch": 0.12, + "learning_rate": 0.00025600000000000004, + "loss": 0.5298, + "step": 64 + }, + { + "epoch": 0.13, + "learning_rate": 0.00026000000000000003, + "loss": 0.5685, + "step": 65 + }, + { + "epoch": 0.13, + "learning_rate": 0.000264, + "loss": 0.5108, + "step": 66 + }, + { + "epoch": 0.13, + "learning_rate": 0.000268, + "loss": 0.526, + "step": 67 + }, + { + "epoch": 0.13, + "learning_rate": 0.00027200000000000005, + "loss": 0.6843, + "step": 68 + }, + { + "epoch": 0.13, + "learning_rate": 0.000276, + "loss": 0.6608, + "step": 69 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028, + "loss": 0.5866, + "step": 70 + }, + { + "epoch": 0.14, + "learning_rate": 0.000284, + "loss": 0.6422, + "step": 71 + }, + { + "epoch": 0.14, + "learning_rate": 0.000288, + "loss": 0.449, + "step": 72 + }, + { + "epoch": 0.14, + "learning_rate": 0.000292, + "loss": 0.5319, + "step": 73 + }, + { + "epoch": 0.14, + "learning_rate": 0.000296, + "loss": 0.5977, + "step": 74 + }, + { + "epoch": 0.14, + "learning_rate": 0.00030000000000000003, + "loss": 0.5805, + "step": 75 + }, + { + "epoch": 0.15, + "learning_rate": 0.000304, + "loss": 0.5209, + "step": 76 + }, + { + "epoch": 0.15, + "learning_rate": 0.000308, + "loss": 0.6098, + "step": 77 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031200000000000005, + "loss": 0.4665, + "step": 78 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031600000000000004, + "loss": 0.6882, + "step": 79 + }, + { + "epoch": 0.15, + "learning_rate": 0.00032, + "loss": 0.5427, + "step": 80 + }, + { + "epoch": 0.16, + "learning_rate": 0.000324, + "loss": 0.5345, + "step": 81 + }, + { + "epoch": 0.16, + "learning_rate": 0.000328, + "loss": 0.663, + "step": 82 + }, + { + "epoch": 0.16, + "learning_rate": 0.000332, + "loss": 0.5393, + "step": 83 + }, + { + "epoch": 0.16, + "learning_rate": 0.000336, + "loss": 0.5711, + "step": 84 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034, + "loss": 0.5261, + "step": 85 + }, + { + "epoch": 0.17, + "learning_rate": 0.000344, + "loss": 0.5775, + "step": 86 + }, + { + "epoch": 0.17, + "learning_rate": 0.000348, + "loss": 0.6329, + "step": 87 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035200000000000005, + "loss": 0.4425, + "step": 88 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035600000000000003, + "loss": 0.6837, + "step": 89 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036, + "loss": 0.615, + "step": 90 + }, + { + "epoch": 0.18, + "learning_rate": 0.000364, + "loss": 0.5615, + "step": 91 + }, + { + "epoch": 0.18, + "learning_rate": 0.00036800000000000005, + "loss": 0.5434, + "step": 92 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037200000000000004, + "loss": 0.5864, + "step": 93 + }, + { + "epoch": 0.18, + "learning_rate": 0.000376, + "loss": 0.5583, + "step": 94 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038, + "loss": 0.5299, + "step": 95 + }, + { + "epoch": 0.18, + "learning_rate": 0.000384, + "loss": 0.532, + "step": 96 + }, + { + "epoch": 0.19, + "learning_rate": 0.000388, + "loss": 0.5227, + "step": 97 + }, + { + "epoch": 0.19, + "learning_rate": 0.000392, + "loss": 0.5275, + "step": 98 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039600000000000003, + "loss": 0.4541, + "step": 99 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004, + "loss": 0.6485, + "step": 100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003999995350775973, + "loss": 0.5438, + "step": 101 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999814031255063, + "loss": 0.5997, + "step": 102 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999581571134455, + "loss": 0.5322, + "step": 103 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003999925612847867, + "loss": 0.484, + "step": 104 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998837704800766, + "loss": 0.5961, + "step": 105 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998326302046085, + "loss": 0.7405, + "step": 106 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997721922592255, + "loss": 0.5802, + "step": 107 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997024569249167, + "loss": 0.769, + "step": 108 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999623424525898, + "loss": 0.5598, + "step": 109 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999535095429608, + "loss": 0.6143, + "step": 110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039994374700467095, + "loss": 0.5766, + "step": 111 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039993305488310836, + "loss": 0.7695, + "step": 112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999214332279831, + "loss": 0.7153, + "step": 113 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999088820933269, + "loss": 0.5835, + "step": 114 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039989540153749286, + "loss": 0.6634, + "step": 115 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399880991623155, + "loss": 0.6069, + "step": 116 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998656524173082, + "loss": 0.7224, + "step": 117 + }, + { + "epoch": 0.23, + "learning_rate": 0.000399849383991268, + "loss": 0.5884, + "step": 118 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998321864206699, + "loss": 0.5122, + "step": 119 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039981405978546924, + "loss": 0.6453, + "step": 120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997950041699408, + "loss": 0.4665, + "step": 121 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997750196626785, + "loss": 0.5428, + "step": 122 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039975410635659464, + "loss": 0.4365, + "step": 123 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039973226434891995, + "loss": 0.5978, + "step": 124 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039970949374120286, + "loss": 0.7729, + "step": 125 + }, + { + "epoch": 0.24, + "learning_rate": 0.000399685794639309, + "loss": 0.6212, + "step": 126 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039966116715342066, + "loss": 0.5426, + "step": 127 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039963561139803676, + "loss": 0.5782, + "step": 128 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003996091274919716, + "loss": 0.6701, + "step": 129 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995817155583548, + "loss": 0.6314, + "step": 130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995533757246307, + "loss": 0.6662, + "step": 131 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995241081225573, + "loss": 0.5192, + "step": 132 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994939128882065, + "loss": 0.5591, + "step": 133 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994627901619625, + "loss": 0.5809, + "step": 134 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994307400885219, + "loss": 0.4871, + "step": 135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993977628168928, + "loss": 0.6666, + "step": 136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993638585003938, + "loss": 0.6469, + "step": 137 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039932902729665357, + "loss": 0.5727, + "step": 138 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039929326936761036, + "loss": 0.6715, + "step": 139 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039925658487951067, + "loss": 0.5686, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039921897400290894, + "loss": 0.501, + "step": 141 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039918043691266665, + "loss": 0.5795, + "step": 142 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039914097378795124, + "loss": 0.6287, + "step": 143 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039910058481223564, + "loss": 0.7016, + "step": 144 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039905927017329726, + "loss": 0.6232, + "step": 145 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039901703006321715, + "loss": 0.5291, + "step": 146 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039897386467837903, + "loss": 0.5297, + "step": 147 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892977421946844, + "loss": 0.5784, + "step": 148 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988847588914718, + "loss": 0.5714, + "step": 149 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988388189036754, + "loss": 0.5044, + "step": 150 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987919544696646, + "loss": 0.8246, + "step": 151 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987441658073226, + "loss": 0.5048, + "step": 152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003986954531388297, + "loss": 0.5433, + "step": 153 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039864581669066186, + "loss": 0.5251, + "step": 154 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003985952566935902, + "loss": 0.5708, + "step": 155 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039854377338267936, + "loss": 0.6276, + "step": 156 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039849136699728684, + "loss": 0.4915, + "step": 157 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003984380377810617, + "loss": 0.6389, + "step": 158 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039838378598194325, + "loss": 0.6067, + "step": 159 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039832861185216045, + "loss": 0.6136, + "step": 160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003982725156482301, + "loss": 0.5597, + "step": 161 + }, + { + "epoch": 0.31, + "learning_rate": 0.000398215497630956, + "loss": 0.5957, + "step": 162 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003981575580654278, + "loss": 0.5853, + "step": 163 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980986972210194, + "loss": 0.5462, + "step": 164 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980389153713881, + "loss": 0.5302, + "step": 165 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039797821279447307, + "loss": 0.5395, + "step": 166 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039791658977249425, + "loss": 0.7004, + "step": 167 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039785404659195084, + "loss": 0.5622, + "step": 168 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039779058354362013, + "loss": 0.5759, + "step": 169 + }, + { + "epoch": 0.33, + "learning_rate": 0.000397726200922556, + "loss": 0.6184, + "step": 170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003976608990280877, + "loss": 0.5488, + "step": 171 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975946781638183, + "loss": 0.6162, + "step": 172 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975275386376236, + "loss": 0.558, + "step": 173 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003974594807616502, + "loss": 0.519, + "step": 174 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003973905048523144, + "loss": 0.6195, + "step": 175 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039732061123030064, + "loss": 0.5991, + "step": 176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003972498002205601, + "loss": 0.5428, + "step": 177 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039717807215230896, + "loss": 0.5323, + "step": 178 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039710542735902705, + "loss": 0.5307, + "step": 179 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003970318661784564, + "loss": 0.5783, + "step": 180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003969573889525993, + "loss": 0.5924, + "step": 181 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039688199602771714, + "loss": 0.5902, + "step": 182 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039680568775432855, + "loss": 0.6291, + "step": 183 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003967284644872077, + "loss": 0.5942, + "step": 184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003966503265853829, + "loss": 0.4878, + "step": 185 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003965712744121347, + "loss": 0.6487, + "step": 186 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003964913083349945, + "loss": 0.6111, + "step": 187 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039641042872574233, + "loss": 0.6072, + "step": 188 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039632863596040575, + "loss": 0.716, + "step": 189 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039624593041925763, + "loss": 0.6178, + "step": 190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003961623124868145, + "loss": 0.6323, + "step": 191 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039607778255183485, + "loss": 0.5821, + "step": 192 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959923410073174, + "loss": 0.6738, + "step": 193 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959059882504989, + "loss": 0.6203, + "step": 194 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039581872468285277, + "loss": 0.632, + "step": 195 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003957305507100868, + "loss": 0.5857, + "step": 196 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039564146674214164, + "loss": 0.6311, + "step": 197 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003955514731931885, + "loss": 0.5889, + "step": 198 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039546057048162763, + "loss": 0.5201, + "step": 199 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039536875903008607, + "loss": 0.5581, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039527603926541586, + "loss": 0.5104, + "step": 201 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039518241161869193, + "loss": 0.5978, + "step": 202 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039508787652521013, + "loss": 0.6244, + "step": 203 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039499243442448536, + "loss": 0.589, + "step": 204 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003948960857602493, + "loss": 0.575, + "step": 205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947988309804485, + "loss": 0.5494, + "step": 206 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947006705372422, + "loss": 0.4895, + "step": 207 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039460160488700036, + "loss": 0.5479, + "step": 208 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039450163449030124, + "loss": 0.5893, + "step": 209 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003944007598119297, + "loss": 0.5451, + "step": 210 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003942989813208747, + "loss": 0.5582, + "step": 211 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003941962994903273, + "loss": 0.5121, + "step": 212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039409271479767826, + "loss": 0.6324, + "step": 213 + }, + { + "epoch": 0.41, + "learning_rate": 0.000393988227724516, + "loss": 0.6118, + "step": 214 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003938828387566244, + "loss": 0.6303, + "step": 215 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003937765483839804, + "loss": 0.7705, + "step": 216 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003936693571007517, + "loss": 0.6224, + "step": 217 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003935612654052946, + "loss": 0.5664, + "step": 218 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039345227380015163, + "loss": 0.66, + "step": 219 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039334238279204906, + "loss": 0.5582, + "step": 220 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039323159289189505, + "loss": 0.6087, + "step": 221 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003931199046147764, + "loss": 0.5566, + "step": 222 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039300731847995716, + "loss": 0.5775, + "step": 223 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039289383501087534, + "loss": 0.5081, + "step": 224 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039277945473514104, + "loss": 0.5218, + "step": 225 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003926641781845338, + "loss": 0.6655, + "step": 226 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003925480058950002, + "loss": 0.5735, + "step": 227 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039243093840665114, + "loss": 0.6609, + "step": 228 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003923129762637596, + "loss": 0.7323, + "step": 229 + }, + { + "epoch": 0.44, + "learning_rate": 0.000392194120014758, + "loss": 0.5703, + "step": 230 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039207437021223583, + "loss": 0.6545, + "step": 231 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003919537274129366, + "loss": 0.521, + "step": 232 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039183219217775564, + "loss": 0.5257, + "step": 233 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003917097650717377, + "loss": 0.5487, + "step": 234 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039158644666407365, + "loss": 0.4861, + "step": 235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039146223752809845, + "loss": 0.4928, + "step": 236 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003913371382412883, + "loss": 0.5253, + "step": 237 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039121114938525756, + "loss": 0.6155, + "step": 238 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039108427154575684, + "loss": 0.55, + "step": 239 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039095650531266967, + "loss": 0.6617, + "step": 240 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039082785128000976, + "loss": 0.5198, + "step": 241 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039069831004591866, + "loss": 0.5302, + "step": 242 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003905678822126625, + "loss": 0.5347, + "step": 243 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039043656838662946, + "loss": 0.531, + "step": 244 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039030436917832697, + "loss": 0.4884, + "step": 245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039017128520237883, + "loss": 0.6027, + "step": 246 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003900373170775222, + "loss": 0.5537, + "step": 247 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038990246542660494, + "loss": 0.5753, + "step": 248 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038976673087658256, + "loss": 0.5059, + "step": 249 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038963011405851537, + "loss": 0.5118, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038949261560756565, + "loss": 0.5645, + "step": 251 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003893542361629944, + "loss": 0.5623, + "step": 252 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038921497636815866, + "loss": 0.5216, + "step": 253 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003890748368705085, + "loss": 0.4501, + "step": 254 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003889338183215838, + "loss": 0.48, + "step": 255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038879192137701135, + "loss": 0.5218, + "step": 256 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003886491466965018, + "loss": 0.5858, + "step": 257 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038850549494384685, + "loss": 0.6124, + "step": 258 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038836096678691536, + "loss": 0.4645, + "step": 259 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038821556289765136, + "loss": 0.474, + "step": 260 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038806928395207003, + "loss": 0.4364, + "step": 261 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038792213063025484, + "loss": 0.5821, + "step": 262 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003877741036163547, + "loss": 0.5393, + "step": 263 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003876252035985804, + "loss": 0.5373, + "step": 264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003874754312692013, + "loss": 0.6021, + "step": 265 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003873247873245426, + "loss": 0.4549, + "step": 266 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003871732724649817, + "loss": 0.5994, + "step": 267 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003870208873949453, + "loss": 0.4764, + "step": 268 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038686763282290556, + "loss": 0.4311, + "step": 269 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003867135094613774, + "loss": 0.5462, + "step": 270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003865585180269148, + "loss": 0.5006, + "step": 271 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003864026592401076, + "loss": 0.5347, + "step": 272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038624593382557835, + "loss": 0.5242, + "step": 273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038608834251197856, + "loss": 0.5005, + "step": 274 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038592988603198554, + "loss": 0.5436, + "step": 275 + }, + { + "epoch": 0.53, + "learning_rate": 0.000385770565122299, + "loss": 0.4658, + "step": 276 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003856103805236375, + "loss": 0.5273, + "step": 277 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038544933298073516, + "loss": 0.436, + "step": 278 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038528742324233804, + "loss": 0.4785, + "step": 279 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038512465206120086, + "loss": 0.5366, + "step": 280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038496102019408324, + "loss": 0.4448, + "step": 281 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038479652840174637, + "loss": 0.5132, + "step": 282 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038463117744894955, + "loss": 0.7918, + "step": 283 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038446496810444627, + "loss": 0.5309, + "step": 284 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038429790114098114, + "loss": 0.5316, + "step": 285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038412997733528576, + "loss": 0.4611, + "step": 286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038396119746807563, + "loss": 0.4609, + "step": 287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038379156232404613, + "loss": 0.5821, + "step": 288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003836210726918691, + "loss": 0.5883, + "step": 289 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003834497293641889, + "loss": 0.5012, + "step": 290 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038327753313761913, + "loss": 0.4457, + "step": 291 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038310448481273867, + "loss": 0.4851, + "step": 292 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038293058519408787, + "loss": 0.5622, + "step": 293 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038275583509016507, + "loss": 0.5703, + "step": 294 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038258023531342265, + "loss": 0.5718, + "step": 295 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003824037866802632, + "loss": 0.5183, + "step": 296 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038222649001103614, + "loss": 0.5085, + "step": 297 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038204834613003323, + "loss": 0.5388, + "step": 298 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038186935586548537, + "loss": 0.5425, + "step": 299 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003816895200495584, + "loss": 0.447, + "step": 300 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003815088395183493, + "loss": 0.5541, + "step": 301 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038132731511188227, + "loss": 0.5518, + "step": 302 + }, + { + "epoch": 0.58, + "learning_rate": 0.000381144947674105, + "loss": 0.5074, + "step": 303 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809617380528847, + "loss": 0.5134, + "step": 304 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807776871000037, + "loss": 0.4599, + "step": 305 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805927956711562, + "loss": 0.5838, + "step": 306 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038040706462594395, + "loss": 0.5216, + "step": 307 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038022049482787216, + "loss": 0.5323, + "step": 308 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003800330871443456, + "loss": 0.5681, + "step": 309 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037984484244666446, + "loss": 0.4172, + "step": 310 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003796557616100207, + "loss": 0.4958, + "step": 311 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003794658455134934, + "loss": 0.662, + "step": 312 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003792750950400451, + "loss": 0.5832, + "step": 313 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003790835110765174, + "loss": 0.4271, + "step": 314 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003788910945136271, + "loss": 0.4842, + "step": 315 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037869784624596186, + "loss": 0.4656, + "step": 316 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037850376717197626, + "loss": 0.4981, + "step": 317 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037830885819398733, + "loss": 0.5162, + "step": 318 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037811312021817067, + "loss": 0.652, + "step": 319 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003779165541545558, + "loss": 0.5104, + "step": 320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003777191609170225, + "loss": 0.4971, + "step": 321 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003775209414232962, + "loss": 0.4871, + "step": 322 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003773218965949436, + "loss": 0.5226, + "step": 323 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037712202735736884, + "loss": 0.4823, + "step": 324 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003769213346398087, + "loss": 0.497, + "step": 325 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003767198193753286, + "loss": 0.5976, + "step": 326 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765174825008181, + "loss": 0.4532, + "step": 327 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003763143249569868, + "loss": 0.5236, + "step": 328 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037611034768835947, + "loss": 0.6513, + "step": 329 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037590555164327224, + "loss": 0.5686, + "step": 330 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037569993777386774, + "loss": 0.456, + "step": 331 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003754935070360909, + "loss": 0.5181, + "step": 332 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003752862603896846, + "loss": 0.4765, + "step": 333 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037507819879818477, + "loss": 0.5363, + "step": 334 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037486932322891646, + "loss": 0.4584, + "step": 335 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037465963465298886, + "loss": 0.5428, + "step": 336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003744491340452913, + "loss": 0.3927, + "step": 337 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003742378223844882, + "loss": 0.5478, + "step": 338 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003740257006530147, + "loss": 0.469, + "step": 339 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037381276983707246, + "loss": 0.5169, + "step": 340 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037359903092662434, + "loss": 0.4797, + "step": 341 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037338448491539054, + "loss": 0.5315, + "step": 342 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037316913280084353, + "loss": 0.4422, + "step": 343 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003729529755842035, + "loss": 0.4426, + "step": 344 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003727360142704337, + "loss": 0.4718, + "step": 345 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003725182498682361, + "loss": 0.5585, + "step": 346 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003722996833900459, + "loss": 0.4775, + "step": 347 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003720803158520279, + "loss": 0.6014, + "step": 348 + }, + { + "epoch": 0.67, + "learning_rate": 0.00037186014827407076, + "loss": 0.5117, + "step": 349 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003716391816797829, + "loss": 0.5404, + "step": 350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003714174170964876, + "loss": 0.527, + "step": 351 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037119485555521796, + "loss": 0.4555, + "step": 352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037097149809071255, + "loss": 0.5372, + "step": 353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037074734574141016, + "loss": 0.5377, + "step": 354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003705223995494454, + "loss": 0.4925, + "step": 355 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037029666056064345, + "loss": 0.482, + "step": 356 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037007012982451546, + "loss": 0.5235, + "step": 357 + }, + { + "epoch": 0.69, + "learning_rate": 0.00036984280839425356, + "loss": 0.4957, + "step": 358 + }, + { + "epoch": 0.69, + "learning_rate": 0.000369614697326726, + "loss": 0.5379, + "step": 359 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003693857976824721, + "loss": 0.4653, + "step": 360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036915611052569785, + "loss": 0.469, + "step": 361 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003689256369242702, + "loss": 0.5618, + "step": 362 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003686943779497124, + "loss": 0.4459, + "step": 363 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003684623346771995, + "loss": 0.5606, + "step": 364 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003682295081855524, + "loss": 0.4368, + "step": 365 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036799589955723375, + "loss": 0.4168, + "step": 366 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036776150987834243, + "loss": 0.4664, + "step": 367 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036752634023860846, + "loss": 0.4737, + "step": 368 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003672903917313883, + "loss": 0.4247, + "step": 369 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036705366545365935, + "loss": 0.5677, + "step": 370 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036681616250601505, + "loss": 0.5441, + "step": 371 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003665778839926599, + "loss": 0.6247, + "step": 372 + }, + { + "epoch": 0.72, + "learning_rate": 0.00036633883102140405, + "loss": 0.5217, + "step": 373 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003660990047036584, + "loss": 0.4651, + "step": 374 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003658584061544291, + "loss": 0.4648, + "step": 375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003656170364923128, + "loss": 0.6048, + "step": 376 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036537489683949114, + "loss": 0.4515, + "step": 377 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003651319883217255, + "loss": 0.5096, + "step": 378 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036488831206835207, + "loss": 0.4231, + "step": 379 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036464386921227637, + "loss": 0.4903, + "step": 380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036439866088996796, + "loss": 0.5131, + "step": 381 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003641526882414553, + "loss": 0.5986, + "step": 382 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003639059524103203, + "loss": 0.6, + "step": 383 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003636584545436931, + "loss": 0.5216, + "step": 384 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003634101957922468, + "loss": 0.5144, + "step": 385 + }, + { + "epoch": 0.74, + "learning_rate": 0.00036316117731019184, + "loss": 0.4963, + "step": 386 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003629114002552711, + "loss": 0.5657, + "step": 387 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036266086578875384, + "loss": 0.5028, + "step": 388 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003624095750754311, + "loss": 0.573, + "step": 389 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036215752928360967, + "loss": 0.5199, + "step": 390 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003619047295851068, + "loss": 0.656, + "step": 391 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036165117715524506, + "loss": 0.5129, + "step": 392 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036139687317284647, + "loss": 0.3945, + "step": 393 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003611418188202271, + "loss": 0.5318, + "step": 394 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036088601528319196, + "loss": 0.5344, + "step": 395 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036062946375102885, + "loss": 0.5407, + "step": 396 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003603721654165034, + "loss": 0.5364, + "step": 397 + }, + { + "epoch": 0.77, + "learning_rate": 0.00036011412147585306, + "loss": 0.5407, + "step": 398 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003598553331287821, + "loss": 0.5999, + "step": 399 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003595958015784555, + "loss": 0.624, + "step": 400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00035933552803149354, + "loss": 0.5351, + "step": 401 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003590745136979662, + "loss": 0.5196, + "step": 402 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035881275979138765, + "loss": 0.5447, + "step": 403 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003585502675287104, + "loss": 0.4908, + "step": 404 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035828703813031986, + "loss": 0.5172, + "step": 405 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035802307282002834, + "loss": 0.5923, + "step": 406 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003577583728250699, + "loss": 0.568, + "step": 407 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035749293937609395, + "loss": 0.4618, + "step": 408 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003572267737071601, + "loss": 0.5351, + "step": 409 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003569598770557322, + "loss": 0.5285, + "step": 410 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035669225066267256, + "loss": 0.4571, + "step": 411 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035642389577223625, + "loss": 0.4214, + "step": 412 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003561548136320653, + "loss": 0.5393, + "step": 413 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003558850054931828, + "loss": 0.549, + "step": 414 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035561447260998714, + "loss": 0.4824, + "step": 415 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035534321624024656, + "loss": 0.6244, + "step": 416 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035507123764509245, + "loss": 0.5436, + "step": 417 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003547985380890144, + "loss": 0.5198, + "step": 418 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035452511883985366, + "loss": 0.5979, + "step": 419 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035425098116879754, + "loss": 0.4158, + "step": 420 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035397612635037356, + "loss": 0.5125, + "step": 421 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035370055566244334, + "loss": 0.4699, + "step": 422 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003534242703861966, + "loss": 0.5553, + "step": 423 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035314727180614573, + "loss": 0.5969, + "step": 424 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035286956121011897, + "loss": 0.456, + "step": 425 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003525911398892552, + "loss": 0.5195, + "step": 426 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003523120091379975, + "loss": 0.5187, + "step": 427 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035203217025408726, + "loss": 0.5443, + "step": 428 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003517516245385582, + "loss": 0.4476, + "step": 429 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003514703732957301, + "loss": 0.5757, + "step": 430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035118841783320304, + "loss": 0.5129, + "step": 431 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035090575946185114, + "loss": 0.6354, + "step": 432 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035062239949581645, + "loss": 0.4065, + "step": 433 + }, + { + "epoch": 0.84, + "learning_rate": 0.000350338339252503, + "loss": 0.5472, + "step": 434 + }, + { + "epoch": 0.84, + "learning_rate": 0.00035005358005257045, + "loss": 0.5424, + "step": 435 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034976812321992816, + "loss": 0.6127, + "step": 436 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034948197008172877, + "loss": 0.63, + "step": 437 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003491951219683625, + "loss": 0.413, + "step": 438 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034890758021345034, + "loss": 0.5435, + "step": 439 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034861934615383844, + "loss": 0.5433, + "step": 440 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034833042112959153, + "loss": 0.4763, + "step": 441 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034804080648398667, + "loss": 0.5727, + "step": 442 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034775050356350727, + "loss": 0.5392, + "step": 443 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034745951371783666, + "loss": 0.4981, + "step": 444 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003471678382998518, + "loss": 0.5516, + "step": 445 + }, + { + "epoch": 0.86, + "learning_rate": 0.00034687547866561703, + "loss": 0.4965, + "step": 446 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003465824361743779, + "loss": 0.4982, + "step": 447 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003462887121885544, + "loss": 0.5619, + "step": 448 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003459943080737353, + "loss": 0.5273, + "step": 449 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034569922519867133, + "loss": 0.517, + "step": 450 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034540346493526876, + "loss": 0.4874, + "step": 451 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003451070286585833, + "loss": 0.5966, + "step": 452 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003448099177468137, + "loss": 0.4487, + "step": 453 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003445121335812951, + "loss": 0.5091, + "step": 454 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003442136775464929, + "loss": 0.407, + "step": 455 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003439145510299958, + "loss": 0.6327, + "step": 456 + }, + { + "epoch": 0.88, + "learning_rate": 0.00034361475542251025, + "loss": 0.4217, + "step": 457 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003433142921178531, + "loss": 0.6102, + "step": 458 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003430131625129456, + "loss": 0.5556, + "step": 459 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034271136800780673, + "loss": 0.4986, + "step": 460 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003424089100055467, + "loss": 0.5406, + "step": 461 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034210578991236056, + "loss": 0.5881, + "step": 462 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034180200913752157, + "loss": 0.4869, + "step": 463 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034149756909337454, + "loss": 0.5626, + "step": 464 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003411924711953295, + "loss": 0.564, + "step": 465 + }, + { + "epoch": 0.9, + "learning_rate": 0.00034088671686185486, + "loss": 0.6272, + "step": 466 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003405803075144711, + "loss": 0.4643, + "step": 467 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003402732445777438, + "loss": 0.5435, + "step": 468 + }, + { + "epoch": 0.9, + "learning_rate": 0.00033996552947927744, + "loss": 0.5844, + "step": 469 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003396571636497084, + "loss": 0.5362, + "step": 470 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033934814852269865, + "loss": 0.5607, + "step": 471 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003390384855349285, + "loss": 0.4836, + "step": 472 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033872817612609065, + "loss": 0.6555, + "step": 473 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033841722173888315, + "loss": 0.4784, + "step": 474 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033810562381900253, + "loss": 0.5583, + "step": 475 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033779338381513736, + "loss": 0.4679, + "step": 476 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003374805031789613, + "loss": 0.5325, + "step": 477 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033716698336512654, + "loss": 0.6601, + "step": 478 + }, + { + "epoch": 0.92, + "learning_rate": 0.000336852825831257, + "loss": 0.4838, + "step": 479 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003365380320379414, + "loss": 0.5588, + "step": 480 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033622260344872665, + "loss": 0.4596, + "step": 481 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003359065415301108, + "loss": 0.5228, + "step": 482 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033558984775153663, + "loss": 0.5316, + "step": 483 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033527252358538437, + "loss": 0.4761, + "step": 484 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003349545705069653, + "loss": 0.5254, + "step": 485 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003346359899945144, + "loss": 0.4786, + "step": 486 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033431678352918384, + "loss": 0.4302, + "step": 487 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003339969525950361, + "loss": 0.4914, + "step": 488 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033367649867903663, + "loss": 0.4102, + "step": 489 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003333554232710477, + "loss": 0.4698, + "step": 490 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003330337278638207, + "loss": 0.4454, + "step": 491 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033271141395298964, + "loss": 0.4648, + "step": 492 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033238848303706415, + "loss": 0.4616, + "step": 493 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033206493661742237, + "loss": 0.4861, + "step": 494 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033174077619830416, + "loss": 0.4797, + "step": 495 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033141600328680373, + "loss": 0.5104, + "step": 496 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033109061939286336, + "loss": 0.5712, + "step": 497 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033076462602926553, + "loss": 0.5425, + "step": 498 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033043802471162636, + "loss": 0.6156, + "step": 499 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003301108169583887, + "loss": 0.4282, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003297830042908146, + "loss": 0.4088, + "step": 501 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032945458823297857, + "loss": 0.4866, + "step": 502 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003291255703117605, + "loss": 0.5045, + "step": 503 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003287959520568384, + "loss": 0.491, + "step": 504 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032846573500068136, + "loss": 0.458, + "step": 505 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032813492067854246, + "loss": 0.4508, + "step": 506 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003278035106284516, + "loss": 0.4294, + "step": 507 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032747150639120834, + "loss": 0.4834, + "step": 508 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032713890951037477, + "loss": 0.3857, + "step": 509 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032680572153226834, + "loss": 0.4072, + "step": 510 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003264719440059545, + "loss": 0.4028, + "step": 511 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032613757848323977, + "loss": 0.3789, + "step": 512 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032580262651866446, + "loss": 0.4944, + "step": 513 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003254670896694952, + "loss": 0.4259, + "step": 514 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032513096949571805, + "loss": 0.5037, + "step": 515 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032479426756003093, + "loss": 0.5857, + "step": 516 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003244569854278366, + "loss": 0.5407, + "step": 517 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032411912466723524, + "loss": 0.499, + "step": 518 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003237806868490172, + "loss": 0.4359, + "step": 519 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032344167354665573, + "loss": 0.4374, + "step": 520 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003231020863362997, + "loss": 0.4172, + "step": 521 + }, + { + "epoch": 1.01, + "learning_rate": 0.000322761926796766, + "loss": 0.4451, + "step": 522 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003224211965095326, + "loss": 0.4, + "step": 523 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003220798970587309, + "loss": 0.4009, + "step": 524 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003217380300311386, + "loss": 0.3966, + "step": 525 + }, + { + "epoch": 1.01, + "learning_rate": 0.000321395597016172, + "loss": 0.4255, + "step": 526 + }, + { + "epoch": 1.01, + "learning_rate": 0.00032105259960587895, + "loss": 0.4707, + "step": 527 + }, + { + "epoch": 1.02, + "learning_rate": 0.00032070903939493124, + "loss": 0.5313, + "step": 528 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003203649179806172, + "loss": 0.3596, + "step": 529 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003200202369628345, + "loss": 0.5223, + "step": 530 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031967499794408234, + "loss": 0.4146, + "step": 531 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031932920252945423, + "loss": 0.4328, + "step": 532 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003189828523266306, + "loss": 0.4258, + "step": 533 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031863594894587105, + "loss": 0.4457, + "step": 534 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003182884940000072, + "loss": 0.5249, + "step": 535 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003179404891044348, + "loss": 0.4751, + "step": 536 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031759193587710676, + "loss": 0.5378, + "step": 537 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031724283593852497, + "loss": 0.634, + "step": 538 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031689319091173326, + "loss": 0.4298, + "step": 539 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031654300242230977, + "loss": 0.5469, + "step": 540 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031619227209835917, + "loss": 0.5153, + "step": 541 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003158410015705053, + "loss": 0.4144, + "step": 542 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003154891924718837, + "loss": 0.6041, + "step": 543 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003151368464381335, + "loss": 0.4891, + "step": 544 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003147839651073904, + "loss": 0.5258, + "step": 545 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031443055012027874, + "loss": 0.4351, + "step": 546 + }, + { + "epoch": 1.05, + "learning_rate": 0.000314076603119904, + "loss": 0.4556, + "step": 547 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031372212575184514, + "loss": 0.5445, + "step": 548 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031336711966414675, + "loss": 0.5585, + "step": 549 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003130115865073117, + "loss": 0.367, + "step": 550 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003126555279342933, + "loss": 0.4877, + "step": 551 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003122989456004876, + "loss": 0.4335, + "step": 552 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003119418411637258, + "loss": 0.4383, + "step": 553 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003115842162842663, + "loss": 0.4508, + "step": 554 + }, + { + "epoch": 1.07, + "learning_rate": 0.00031122607262478743, + "loss": 0.4631, + "step": 555 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003108674118503793, + "loss": 0.3496, + "step": 556 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003105082356285361, + "loss": 0.4108, + "step": 557 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003101485456291486, + "loss": 0.4877, + "step": 558 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030978834352449614, + "loss": 0.3696, + "step": 559 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030942763098923913, + "loss": 0.5138, + "step": 560 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030906640970041084, + "loss": 0.5961, + "step": 561 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003087046813374099, + "loss": 0.3824, + "step": 562 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030834244758199276, + "loss": 0.4925, + "step": 563 + }, + { + "epoch": 1.09, + "learning_rate": 0.000307979710118265, + "loss": 0.4511, + "step": 564 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030761647063267457, + "loss": 0.4306, + "step": 565 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003072527308140031, + "loss": 0.468, + "step": 566 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030688849235335856, + "loss": 0.4842, + "step": 567 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003065237569441671, + "loss": 0.4332, + "step": 568 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030615852628216537, + "loss": 0.4637, + "step": 569 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003057928020653925, + "loss": 0.6193, + "step": 570 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003054265859941824, + "loss": 0.5033, + "step": 571 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030505987977115555, + "loss": 0.4185, + "step": 572 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003046926851012114, + "loss": 0.4211, + "step": 573 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003043250036915201, + "loss": 0.5089, + "step": 574 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030395683725151505, + "loss": 0.517, + "step": 575 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003035881874928845, + "loss": 0.492, + "step": 576 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003032190561295636, + "loss": 0.4535, + "step": 577 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003028494448777269, + "loss": 0.3947, + "step": 578 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030247935545577986, + "loss": 0.3125, + "step": 579 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003021087895843511, + "loss": 0.3882, + "step": 580 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003017377489862845, + "loss": 0.4802, + "step": 581 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030136623538663083, + "loss": 0.4652, + "step": 582 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030099425051263994, + "loss": 0.3816, + "step": 583 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003006217960937529, + "loss": 0.4583, + "step": 584 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030024887386159385, + "loss": 0.4568, + "step": 585 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029987548554996174, + "loss": 0.3908, + "step": 586 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002995016328948225, + "loss": 0.4235, + "step": 587 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029912731763430075, + "loss": 0.4138, + "step": 588 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029875254150867216, + "loss": 0.5838, + "step": 589 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002983773062603548, + "loss": 0.462, + "step": 590 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029800161363390145, + "loss": 0.4632, + "step": 591 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029762546537599125, + "loss": 0.5898, + "step": 592 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002972488632354218, + "loss": 0.4742, + "step": 593 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029687180896310065, + "loss": 0.4579, + "step": 594 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002964943043120378, + "loss": 0.5514, + "step": 595 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029611635103733675, + "loss": 0.4304, + "step": 596 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002957379508961871, + "loss": 0.4383, + "step": 597 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029535910564785584, + "loss": 0.5327, + "step": 598 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029497981705367933, + "loss": 0.4781, + "step": 599 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029460008687705525, + "loss": 0.4178, + "step": 600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002942199168834342, + "loss": 0.3987, + "step": 601 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029383930884031183, + "loss": 0.3861, + "step": 602 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029345826451722005, + "loss": 0.5322, + "step": 603 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029307678568571936, + "loss": 0.3997, + "step": 604 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002926948741193903, + "loss": 0.4121, + "step": 605 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029231253159382514, + "loss": 0.4931, + "step": 606 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029192975988662017, + "loss": 0.4626, + "step": 607 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029154656077736666, + "loss": 0.4441, + "step": 608 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002911629360476432, + "loss": 0.3863, + "step": 609 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029077888748100703, + "loss": 0.36, + "step": 610 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029039441686298594, + "loss": 0.4246, + "step": 611 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002900095259810702, + "loss": 0.4916, + "step": 612 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028962421662470346, + "loss": 0.4896, + "step": 613 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028923849058527535, + "loss": 0.4237, + "step": 614 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028885234965611274, + "loss": 0.5727, + "step": 615 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028846579563247116, + "loss": 0.5681, + "step": 616 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002880788303115269, + "loss": 0.4383, + "step": 617 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028769145549236845, + "loss": 0.4962, + "step": 618 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002873036729759881, + "loss": 0.5472, + "step": 619 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002869154845652738, + "loss": 0.5431, + "step": 620 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002865268920650003, + "loss": 0.4152, + "step": 621 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002861378972818211, + "loss": 0.3922, + "step": 622 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002857485020242602, + "loss": 0.5129, + "step": 623 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002853587081027034, + "loss": 0.4328, + "step": 624 + }, + { + "epoch": 1.2, + "learning_rate": 0.00028496851732938997, + "loss": 0.4431, + "step": 625 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002845779315184042, + "loss": 0.4968, + "step": 626 + }, + { + "epoch": 1.21, + "learning_rate": 0.000284186952485667, + "loss": 0.5301, + "step": 627 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002837955820489276, + "loss": 0.4332, + "step": 628 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002834038220277546, + "loss": 0.4245, + "step": 629 + }, + { + "epoch": 1.21, + "learning_rate": 0.00028301167424352836, + "loss": 0.5057, + "step": 630 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028261914051943166, + "loss": 0.4623, + "step": 631 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028222622268044174, + "loss": 0.5452, + "step": 632 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028183292255332164, + "loss": 0.5238, + "step": 633 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028143924196661176, + "loss": 0.3966, + "step": 634 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002810451827506214, + "loss": 0.35, + "step": 635 + }, + { + "epoch": 1.23, + "learning_rate": 0.00028065074673742007, + "loss": 0.4325, + "step": 636 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002802559357608292, + "loss": 0.4854, + "step": 637 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027986075165641343, + "loss": 0.4254, + "step": 638 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027946519626147225, + "loss": 0.4614, + "step": 639 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027906927141503125, + "loss": 0.3798, + "step": 640 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027867297895783373, + "loss": 0.4742, + "step": 641 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002782763207323322, + "loss": 0.4007, + "step": 642 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002778792985826795, + "loss": 0.4383, + "step": 643 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002774819143547206, + "loss": 0.4298, + "step": 644 + }, + { + "epoch": 1.24, + "learning_rate": 0.00027708416989598387, + "loss": 0.5178, + "step": 645 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002766860670556722, + "loss": 0.3434, + "step": 646 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002762876076846551, + "loss": 0.3862, + "step": 647 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027588879363545934, + "loss": 0.4459, + "step": 648 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002754896267622608, + "loss": 0.3934, + "step": 649 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027509010892087565, + "loss": 0.4349, + "step": 650 + } + ], + "logging_steps": 1, + "max_steps": 1557, + "num_train_epochs": 3, + "save_steps": 50, + "total_flos": 8.708683022784922e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-650/training_args.bin b/checkpoint-650/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbdf20491848d40e9a89bca19c6229b4b2b55e5d --- /dev/null +++ b/checkpoint-650/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07819caee47f45203545f962678d52b5954ff1fd4afe1d5152fad48004402099 +size 4155 diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b32efe7366f05d1d90816d2ad9e4b06ccca46bea --- /dev/null +++ b/checkpoint-700/README.md @@ -0,0 +1,219 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 + +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e108f2da037ef6250457c67a4bedd308d97303c --- /dev/null +++ b/checkpoint-700/adapter_config.json @@ -0,0 +1,24 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-700/adapter_model.bin b/checkpoint-700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..08eb8ab92eac85b0939c839125d2b4f48d21b12f --- /dev/null +++ b/checkpoint-700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e05dcbf6a0eb6c227daf79f403826fb96f696f78fcae8997c0788ae630aaa6df +size 113314765 diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a040176b68cd02a9f4c6cfce7960b00a349cd284 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aaf4be3170088c5a633ea2b98b4000542a6eedfdbccc39f40e2a0aa8908411c +size 226653957 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f25b5c4bb8b1f73efa158f82884219b4cc6d7e4a --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d644493925e965e20b39de9c3711211259f8396a94a62986c092fb2026af3821 +size 14575 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f50d708a9f9dbe99cc2ec182ae711957ff05d6d3 --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82fae75956dfc9e7cb641e5a21ef9ab176011d3b4ea29c8e3c08de7f701c6c44 +size 627 diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c7e65333074953684fc1cedec6c9c9cdfe9d454b --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,4219 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3485377301721027, + "eval_steps": 500, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6869, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.8396, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 0.7489, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7252, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.6548, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 0.8022, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6524, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6981, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 0.7488, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 0.6368, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.6891, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 0.7968, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 5.2000000000000004e-05, + "loss": 0.6912, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 5.6000000000000006e-05, + "loss": 0.8452, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 6e-05, + "loss": 0.6989, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 6.400000000000001e-05, + "loss": 0.6685, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-05, + "loss": 0.5469, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 7.2e-05, + "loss": 0.7915, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 7.6e-05, + "loss": 0.7744, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 8e-05, + "loss": 0.6804, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 8.4e-05, + "loss": 0.7796, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 8.800000000000001e-05, + "loss": 0.706, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000001e-05, + "loss": 0.6798, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 9.6e-05, + "loss": 0.6333, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001, + "loss": 0.6012, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010400000000000001, + "loss": 0.52, + "step": 26 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010800000000000001, + "loss": 0.6583, + "step": 27 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011200000000000001, + "loss": 0.7354, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 0.000116, + "loss": 0.6296, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012, + "loss": 0.6352, + "step": 30 + }, + { + "epoch": 0.06, + "learning_rate": 0.000124, + "loss": 0.6007, + "step": 31 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012800000000000002, + "loss": 0.5659, + "step": 32 + }, + { + "epoch": 0.06, + "learning_rate": 0.000132, + "loss": 0.5138, + "step": 33 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013600000000000003, + "loss": 0.6639, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014, + "loss": 0.5934, + "step": 35 + }, + { + "epoch": 0.07, + "learning_rate": 0.000144, + "loss": 0.5233, + "step": 36 + }, + { + "epoch": 0.07, + "learning_rate": 0.000148, + "loss": 0.5307, + "step": 37 + }, + { + "epoch": 0.07, + "learning_rate": 0.000152, + "loss": 0.5928, + "step": 38 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015600000000000002, + "loss": 0.5908, + "step": 39 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016, + "loss": 0.6366, + "step": 40 + }, + { + "epoch": 0.08, + "learning_rate": 0.000164, + "loss": 0.5972, + "step": 41 + }, + { + "epoch": 0.08, + "learning_rate": 0.000168, + "loss": 0.4825, + "step": 42 + }, + { + "epoch": 0.08, + "learning_rate": 0.000172, + "loss": 0.6783, + "step": 43 + }, + { + "epoch": 0.08, + "learning_rate": 0.00017600000000000002, + "loss": 0.6082, + "step": 44 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018, + "loss": 0.7633, + "step": 45 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018400000000000003, + "loss": 0.5988, + "step": 46 + }, + { + "epoch": 0.09, + "learning_rate": 0.000188, + "loss": 0.6658, + "step": 47 + }, + { + "epoch": 0.09, + "learning_rate": 0.000192, + "loss": 0.5945, + "step": 48 + }, + { + "epoch": 0.09, + "learning_rate": 0.000196, + "loss": 0.5984, + "step": 49 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002, + "loss": 0.6778, + "step": 50 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020400000000000003, + "loss": 0.6057, + "step": 51 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020800000000000001, + "loss": 0.601, + "step": 52 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021200000000000003, + "loss": 0.5566, + "step": 53 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021600000000000002, + "loss": 0.5911, + "step": 54 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022000000000000003, + "loss": 0.7636, + "step": 55 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022400000000000002, + "loss": 0.5537, + "step": 56 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022799999999999999, + "loss": 0.6037, + "step": 57 + }, + { + "epoch": 0.11, + "learning_rate": 0.000232, + "loss": 0.6474, + "step": 58 + }, + { + "epoch": 0.11, + "learning_rate": 0.000236, + "loss": 0.6483, + "step": 59 + }, + { + "epoch": 0.12, + "learning_rate": 0.00024, + "loss": 0.5021, + "step": 60 + }, + { + "epoch": 0.12, + "learning_rate": 0.000244, + "loss": 0.5347, + "step": 61 + }, + { + "epoch": 0.12, + "learning_rate": 0.000248, + "loss": 0.5791, + "step": 62 + }, + { + "epoch": 0.12, + "learning_rate": 0.000252, + "loss": 0.5407, + "step": 63 + }, + { + "epoch": 0.12, + "learning_rate": 0.00025600000000000004, + "loss": 0.5298, + "step": 64 + }, + { + "epoch": 0.13, + "learning_rate": 0.00026000000000000003, + "loss": 0.5685, + "step": 65 + }, + { + "epoch": 0.13, + "learning_rate": 0.000264, + "loss": 0.5108, + "step": 66 + }, + { + "epoch": 0.13, + "learning_rate": 0.000268, + "loss": 0.526, + "step": 67 + }, + { + "epoch": 0.13, + "learning_rate": 0.00027200000000000005, + "loss": 0.6843, + "step": 68 + }, + { + "epoch": 0.13, + "learning_rate": 0.000276, + "loss": 0.6608, + "step": 69 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028, + "loss": 0.5866, + "step": 70 + }, + { + "epoch": 0.14, + "learning_rate": 0.000284, + "loss": 0.6422, + "step": 71 + }, + { + "epoch": 0.14, + "learning_rate": 0.000288, + "loss": 0.449, + "step": 72 + }, + { + "epoch": 0.14, + "learning_rate": 0.000292, + "loss": 0.5319, + "step": 73 + }, + { + "epoch": 0.14, + "learning_rate": 0.000296, + "loss": 0.5977, + "step": 74 + }, + { + "epoch": 0.14, + "learning_rate": 0.00030000000000000003, + "loss": 0.5805, + "step": 75 + }, + { + "epoch": 0.15, + "learning_rate": 0.000304, + "loss": 0.5209, + "step": 76 + }, + { + "epoch": 0.15, + "learning_rate": 0.000308, + "loss": 0.6098, + "step": 77 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031200000000000005, + "loss": 0.4665, + "step": 78 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031600000000000004, + "loss": 0.6882, + "step": 79 + }, + { + "epoch": 0.15, + "learning_rate": 0.00032, + "loss": 0.5427, + "step": 80 + }, + { + "epoch": 0.16, + "learning_rate": 0.000324, + "loss": 0.5345, + "step": 81 + }, + { + "epoch": 0.16, + "learning_rate": 0.000328, + "loss": 0.663, + "step": 82 + }, + { + "epoch": 0.16, + "learning_rate": 0.000332, + "loss": 0.5393, + "step": 83 + }, + { + "epoch": 0.16, + "learning_rate": 0.000336, + "loss": 0.5711, + "step": 84 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034, + "loss": 0.5261, + "step": 85 + }, + { + "epoch": 0.17, + "learning_rate": 0.000344, + "loss": 0.5775, + "step": 86 + }, + { + "epoch": 0.17, + "learning_rate": 0.000348, + "loss": 0.6329, + "step": 87 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035200000000000005, + "loss": 0.4425, + "step": 88 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035600000000000003, + "loss": 0.6837, + "step": 89 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036, + "loss": 0.615, + "step": 90 + }, + { + "epoch": 0.18, + "learning_rate": 0.000364, + "loss": 0.5615, + "step": 91 + }, + { + "epoch": 0.18, + "learning_rate": 0.00036800000000000005, + "loss": 0.5434, + "step": 92 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037200000000000004, + "loss": 0.5864, + "step": 93 + }, + { + "epoch": 0.18, + "learning_rate": 0.000376, + "loss": 0.5583, + "step": 94 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038, + "loss": 0.5299, + "step": 95 + }, + { + "epoch": 0.18, + "learning_rate": 0.000384, + "loss": 0.532, + "step": 96 + }, + { + "epoch": 0.19, + "learning_rate": 0.000388, + "loss": 0.5227, + "step": 97 + }, + { + "epoch": 0.19, + "learning_rate": 0.000392, + "loss": 0.5275, + "step": 98 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039600000000000003, + "loss": 0.4541, + "step": 99 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004, + "loss": 0.6485, + "step": 100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003999995350775973, + "loss": 0.5438, + "step": 101 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999814031255063, + "loss": 0.5997, + "step": 102 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999581571134455, + "loss": 0.5322, + "step": 103 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003999925612847867, + "loss": 0.484, + "step": 104 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998837704800766, + "loss": 0.5961, + "step": 105 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998326302046085, + "loss": 0.7405, + "step": 106 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997721922592255, + "loss": 0.5802, + "step": 107 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997024569249167, + "loss": 0.769, + "step": 108 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999623424525898, + "loss": 0.5598, + "step": 109 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999535095429608, + "loss": 0.6143, + "step": 110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039994374700467095, + "loss": 0.5766, + "step": 111 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039993305488310836, + "loss": 0.7695, + "step": 112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999214332279831, + "loss": 0.7153, + "step": 113 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999088820933269, + "loss": 0.5835, + "step": 114 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039989540153749286, + "loss": 0.6634, + "step": 115 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399880991623155, + "loss": 0.6069, + "step": 116 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998656524173082, + "loss": 0.7224, + "step": 117 + }, + { + "epoch": 0.23, + "learning_rate": 0.000399849383991268, + "loss": 0.5884, + "step": 118 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998321864206699, + "loss": 0.5122, + "step": 119 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039981405978546924, + "loss": 0.6453, + "step": 120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997950041699408, + "loss": 0.4665, + "step": 121 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997750196626785, + "loss": 0.5428, + "step": 122 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039975410635659464, + "loss": 0.4365, + "step": 123 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039973226434891995, + "loss": 0.5978, + "step": 124 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039970949374120286, + "loss": 0.7729, + "step": 125 + }, + { + "epoch": 0.24, + "learning_rate": 0.000399685794639309, + "loss": 0.6212, + "step": 126 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039966116715342066, + "loss": 0.5426, + "step": 127 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039963561139803676, + "loss": 0.5782, + "step": 128 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003996091274919716, + "loss": 0.6701, + "step": 129 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995817155583548, + "loss": 0.6314, + "step": 130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995533757246307, + "loss": 0.6662, + "step": 131 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995241081225573, + "loss": 0.5192, + "step": 132 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994939128882065, + "loss": 0.5591, + "step": 133 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994627901619625, + "loss": 0.5809, + "step": 134 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994307400885219, + "loss": 0.4871, + "step": 135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993977628168928, + "loss": 0.6666, + "step": 136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993638585003938, + "loss": 0.6469, + "step": 137 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039932902729665357, + "loss": 0.5727, + "step": 138 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039929326936761036, + "loss": 0.6715, + "step": 139 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039925658487951067, + "loss": 0.5686, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039921897400290894, + "loss": 0.501, + "step": 141 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039918043691266665, + "loss": 0.5795, + "step": 142 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039914097378795124, + "loss": 0.6287, + "step": 143 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039910058481223564, + "loss": 0.7016, + "step": 144 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039905927017329726, + "loss": 0.6232, + "step": 145 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039901703006321715, + "loss": 0.5291, + "step": 146 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039897386467837903, + "loss": 0.5297, + "step": 147 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892977421946844, + "loss": 0.5784, + "step": 148 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988847588914718, + "loss": 0.5714, + "step": 149 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988388189036754, + "loss": 0.5044, + "step": 150 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987919544696646, + "loss": 0.8246, + "step": 151 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987441658073226, + "loss": 0.5048, + "step": 152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003986954531388297, + "loss": 0.5433, + "step": 153 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039864581669066186, + "loss": 0.5251, + "step": 154 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003985952566935902, + "loss": 0.5708, + "step": 155 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039854377338267936, + "loss": 0.6276, + "step": 156 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039849136699728684, + "loss": 0.4915, + "step": 157 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003984380377810617, + "loss": 0.6389, + "step": 158 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039838378598194325, + "loss": 0.6067, + "step": 159 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039832861185216045, + "loss": 0.6136, + "step": 160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003982725156482301, + "loss": 0.5597, + "step": 161 + }, + { + "epoch": 0.31, + "learning_rate": 0.000398215497630956, + "loss": 0.5957, + "step": 162 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003981575580654278, + "loss": 0.5853, + "step": 163 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980986972210194, + "loss": 0.5462, + "step": 164 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980389153713881, + "loss": 0.5302, + "step": 165 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039797821279447307, + "loss": 0.5395, + "step": 166 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039791658977249425, + "loss": 0.7004, + "step": 167 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039785404659195084, + "loss": 0.5622, + "step": 168 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039779058354362013, + "loss": 0.5759, + "step": 169 + }, + { + "epoch": 0.33, + "learning_rate": 0.000397726200922556, + "loss": 0.6184, + "step": 170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003976608990280877, + "loss": 0.5488, + "step": 171 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975946781638183, + "loss": 0.6162, + "step": 172 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975275386376236, + "loss": 0.558, + "step": 173 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003974594807616502, + "loss": 0.519, + "step": 174 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003973905048523144, + "loss": 0.6195, + "step": 175 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039732061123030064, + "loss": 0.5991, + "step": 176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003972498002205601, + "loss": 0.5428, + "step": 177 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039717807215230896, + "loss": 0.5323, + "step": 178 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039710542735902705, + "loss": 0.5307, + "step": 179 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003970318661784564, + "loss": 0.5783, + "step": 180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003969573889525993, + "loss": 0.5924, + "step": 181 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039688199602771714, + "loss": 0.5902, + "step": 182 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039680568775432855, + "loss": 0.6291, + "step": 183 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003967284644872077, + "loss": 0.5942, + "step": 184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003966503265853829, + "loss": 0.4878, + "step": 185 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003965712744121347, + "loss": 0.6487, + "step": 186 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003964913083349945, + "loss": 0.6111, + "step": 187 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039641042872574233, + "loss": 0.6072, + "step": 188 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039632863596040575, + "loss": 0.716, + "step": 189 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039624593041925763, + "loss": 0.6178, + "step": 190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003961623124868145, + "loss": 0.6323, + "step": 191 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039607778255183485, + "loss": 0.5821, + "step": 192 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959923410073174, + "loss": 0.6738, + "step": 193 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959059882504989, + "loss": 0.6203, + "step": 194 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039581872468285277, + "loss": 0.632, + "step": 195 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003957305507100868, + "loss": 0.5857, + "step": 196 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039564146674214164, + "loss": 0.6311, + "step": 197 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003955514731931885, + "loss": 0.5889, + "step": 198 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039546057048162763, + "loss": 0.5201, + "step": 199 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039536875903008607, + "loss": 0.5581, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039527603926541586, + "loss": 0.5104, + "step": 201 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039518241161869193, + "loss": 0.5978, + "step": 202 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039508787652521013, + "loss": 0.6244, + "step": 203 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039499243442448536, + "loss": 0.589, + "step": 204 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003948960857602493, + "loss": 0.575, + "step": 205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947988309804485, + "loss": 0.5494, + "step": 206 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947006705372422, + "loss": 0.4895, + "step": 207 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039460160488700036, + "loss": 0.5479, + "step": 208 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039450163449030124, + "loss": 0.5893, + "step": 209 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003944007598119297, + "loss": 0.5451, + "step": 210 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003942989813208747, + "loss": 0.5582, + "step": 211 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003941962994903273, + "loss": 0.5121, + "step": 212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039409271479767826, + "loss": 0.6324, + "step": 213 + }, + { + "epoch": 0.41, + "learning_rate": 0.000393988227724516, + "loss": 0.6118, + "step": 214 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003938828387566244, + "loss": 0.6303, + "step": 215 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003937765483839804, + "loss": 0.7705, + "step": 216 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003936693571007517, + "loss": 0.6224, + "step": 217 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003935612654052946, + "loss": 0.5664, + "step": 218 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039345227380015163, + "loss": 0.66, + "step": 219 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039334238279204906, + "loss": 0.5582, + "step": 220 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039323159289189505, + "loss": 0.6087, + "step": 221 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003931199046147764, + "loss": 0.5566, + "step": 222 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039300731847995716, + "loss": 0.5775, + "step": 223 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039289383501087534, + "loss": 0.5081, + "step": 224 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039277945473514104, + "loss": 0.5218, + "step": 225 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003926641781845338, + "loss": 0.6655, + "step": 226 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003925480058950002, + "loss": 0.5735, + "step": 227 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039243093840665114, + "loss": 0.6609, + "step": 228 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003923129762637596, + "loss": 0.7323, + "step": 229 + }, + { + "epoch": 0.44, + "learning_rate": 0.000392194120014758, + "loss": 0.5703, + "step": 230 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039207437021223583, + "loss": 0.6545, + "step": 231 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003919537274129366, + "loss": 0.521, + "step": 232 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039183219217775564, + "loss": 0.5257, + "step": 233 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003917097650717377, + "loss": 0.5487, + "step": 234 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039158644666407365, + "loss": 0.4861, + "step": 235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039146223752809845, + "loss": 0.4928, + "step": 236 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003913371382412883, + "loss": 0.5253, + "step": 237 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039121114938525756, + "loss": 0.6155, + "step": 238 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039108427154575684, + "loss": 0.55, + "step": 239 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039095650531266967, + "loss": 0.6617, + "step": 240 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039082785128000976, + "loss": 0.5198, + "step": 241 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039069831004591866, + "loss": 0.5302, + "step": 242 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003905678822126625, + "loss": 0.5347, + "step": 243 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039043656838662946, + "loss": 0.531, + "step": 244 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039030436917832697, + "loss": 0.4884, + "step": 245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039017128520237883, + "loss": 0.6027, + "step": 246 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003900373170775222, + "loss": 0.5537, + "step": 247 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038990246542660494, + "loss": 0.5753, + "step": 248 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038976673087658256, + "loss": 0.5059, + "step": 249 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038963011405851537, + "loss": 0.5118, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038949261560756565, + "loss": 0.5645, + "step": 251 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003893542361629944, + "loss": 0.5623, + "step": 252 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038921497636815866, + "loss": 0.5216, + "step": 253 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003890748368705085, + "loss": 0.4501, + "step": 254 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003889338183215838, + "loss": 0.48, + "step": 255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038879192137701135, + "loss": 0.5218, + "step": 256 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003886491466965018, + "loss": 0.5858, + "step": 257 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038850549494384685, + "loss": 0.6124, + "step": 258 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038836096678691536, + "loss": 0.4645, + "step": 259 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038821556289765136, + "loss": 0.474, + "step": 260 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038806928395207003, + "loss": 0.4364, + "step": 261 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038792213063025484, + "loss": 0.5821, + "step": 262 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003877741036163547, + "loss": 0.5393, + "step": 263 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003876252035985804, + "loss": 0.5373, + "step": 264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003874754312692013, + "loss": 0.6021, + "step": 265 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003873247873245426, + "loss": 0.4549, + "step": 266 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003871732724649817, + "loss": 0.5994, + "step": 267 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003870208873949453, + "loss": 0.4764, + "step": 268 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038686763282290556, + "loss": 0.4311, + "step": 269 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003867135094613774, + "loss": 0.5462, + "step": 270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003865585180269148, + "loss": 0.5006, + "step": 271 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003864026592401076, + "loss": 0.5347, + "step": 272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038624593382557835, + "loss": 0.5242, + "step": 273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038608834251197856, + "loss": 0.5005, + "step": 274 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038592988603198554, + "loss": 0.5436, + "step": 275 + }, + { + "epoch": 0.53, + "learning_rate": 0.000385770565122299, + "loss": 0.4658, + "step": 276 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003856103805236375, + "loss": 0.5273, + "step": 277 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038544933298073516, + "loss": 0.436, + "step": 278 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038528742324233804, + "loss": 0.4785, + "step": 279 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038512465206120086, + "loss": 0.5366, + "step": 280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038496102019408324, + "loss": 0.4448, + "step": 281 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038479652840174637, + "loss": 0.5132, + "step": 282 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038463117744894955, + "loss": 0.7918, + "step": 283 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038446496810444627, + "loss": 0.5309, + "step": 284 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038429790114098114, + "loss": 0.5316, + "step": 285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038412997733528576, + "loss": 0.4611, + "step": 286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038396119746807563, + "loss": 0.4609, + "step": 287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038379156232404613, + "loss": 0.5821, + "step": 288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003836210726918691, + "loss": 0.5883, + "step": 289 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003834497293641889, + "loss": 0.5012, + "step": 290 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038327753313761913, + "loss": 0.4457, + "step": 291 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038310448481273867, + "loss": 0.4851, + "step": 292 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038293058519408787, + "loss": 0.5622, + "step": 293 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038275583509016507, + "loss": 0.5703, + "step": 294 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038258023531342265, + "loss": 0.5718, + "step": 295 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003824037866802632, + "loss": 0.5183, + "step": 296 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038222649001103614, + "loss": 0.5085, + "step": 297 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038204834613003323, + "loss": 0.5388, + "step": 298 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038186935586548537, + "loss": 0.5425, + "step": 299 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003816895200495584, + "loss": 0.447, + "step": 300 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003815088395183493, + "loss": 0.5541, + "step": 301 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038132731511188227, + "loss": 0.5518, + "step": 302 + }, + { + "epoch": 0.58, + "learning_rate": 0.000381144947674105, + "loss": 0.5074, + "step": 303 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809617380528847, + "loss": 0.5134, + "step": 304 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807776871000037, + "loss": 0.4599, + "step": 305 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805927956711562, + "loss": 0.5838, + "step": 306 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038040706462594395, + "loss": 0.5216, + "step": 307 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038022049482787216, + "loss": 0.5323, + "step": 308 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003800330871443456, + "loss": 0.5681, + "step": 309 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037984484244666446, + "loss": 0.4172, + "step": 310 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003796557616100207, + "loss": 0.4958, + "step": 311 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003794658455134934, + "loss": 0.662, + "step": 312 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003792750950400451, + "loss": 0.5832, + "step": 313 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003790835110765174, + "loss": 0.4271, + "step": 314 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003788910945136271, + "loss": 0.4842, + "step": 315 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037869784624596186, + "loss": 0.4656, + "step": 316 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037850376717197626, + "loss": 0.4981, + "step": 317 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037830885819398733, + "loss": 0.5162, + "step": 318 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037811312021817067, + "loss": 0.652, + "step": 319 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003779165541545558, + "loss": 0.5104, + "step": 320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003777191609170225, + "loss": 0.4971, + "step": 321 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003775209414232962, + "loss": 0.4871, + "step": 322 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003773218965949436, + "loss": 0.5226, + "step": 323 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037712202735736884, + "loss": 0.4823, + "step": 324 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003769213346398087, + "loss": 0.497, + "step": 325 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003767198193753286, + "loss": 0.5976, + "step": 326 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765174825008181, + "loss": 0.4532, + "step": 327 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003763143249569868, + "loss": 0.5236, + "step": 328 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037611034768835947, + "loss": 0.6513, + "step": 329 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037590555164327224, + "loss": 0.5686, + "step": 330 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037569993777386774, + "loss": 0.456, + "step": 331 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003754935070360909, + "loss": 0.5181, + "step": 332 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003752862603896846, + "loss": 0.4765, + "step": 333 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037507819879818477, + "loss": 0.5363, + "step": 334 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037486932322891646, + "loss": 0.4584, + "step": 335 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037465963465298886, + "loss": 0.5428, + "step": 336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003744491340452913, + "loss": 0.3927, + "step": 337 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003742378223844882, + "loss": 0.5478, + "step": 338 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003740257006530147, + "loss": 0.469, + "step": 339 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037381276983707246, + "loss": 0.5169, + "step": 340 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037359903092662434, + "loss": 0.4797, + "step": 341 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037338448491539054, + "loss": 0.5315, + "step": 342 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037316913280084353, + "loss": 0.4422, + "step": 343 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003729529755842035, + "loss": 0.4426, + "step": 344 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003727360142704337, + "loss": 0.4718, + "step": 345 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003725182498682361, + "loss": 0.5585, + "step": 346 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003722996833900459, + "loss": 0.4775, + "step": 347 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003720803158520279, + "loss": 0.6014, + "step": 348 + }, + { + "epoch": 0.67, + "learning_rate": 0.00037186014827407076, + "loss": 0.5117, + "step": 349 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003716391816797829, + "loss": 0.5404, + "step": 350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003714174170964876, + "loss": 0.527, + "step": 351 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037119485555521796, + "loss": 0.4555, + "step": 352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037097149809071255, + "loss": 0.5372, + "step": 353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037074734574141016, + "loss": 0.5377, + "step": 354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003705223995494454, + "loss": 0.4925, + "step": 355 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037029666056064345, + "loss": 0.482, + "step": 356 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037007012982451546, + "loss": 0.5235, + "step": 357 + }, + { + "epoch": 0.69, + "learning_rate": 0.00036984280839425356, + "loss": 0.4957, + "step": 358 + }, + { + "epoch": 0.69, + "learning_rate": 0.000369614697326726, + "loss": 0.5379, + "step": 359 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003693857976824721, + "loss": 0.4653, + "step": 360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036915611052569785, + "loss": 0.469, + "step": 361 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003689256369242702, + "loss": 0.5618, + "step": 362 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003686943779497124, + "loss": 0.4459, + "step": 363 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003684623346771995, + "loss": 0.5606, + "step": 364 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003682295081855524, + "loss": 0.4368, + "step": 365 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036799589955723375, + "loss": 0.4168, + "step": 366 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036776150987834243, + "loss": 0.4664, + "step": 367 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036752634023860846, + "loss": 0.4737, + "step": 368 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003672903917313883, + "loss": 0.4247, + "step": 369 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036705366545365935, + "loss": 0.5677, + "step": 370 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036681616250601505, + "loss": 0.5441, + "step": 371 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003665778839926599, + "loss": 0.6247, + "step": 372 + }, + { + "epoch": 0.72, + "learning_rate": 0.00036633883102140405, + "loss": 0.5217, + "step": 373 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003660990047036584, + "loss": 0.4651, + "step": 374 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003658584061544291, + "loss": 0.4648, + "step": 375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003656170364923128, + "loss": 0.6048, + "step": 376 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036537489683949114, + "loss": 0.4515, + "step": 377 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003651319883217255, + "loss": 0.5096, + "step": 378 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036488831206835207, + "loss": 0.4231, + "step": 379 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036464386921227637, + "loss": 0.4903, + "step": 380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036439866088996796, + "loss": 0.5131, + "step": 381 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003641526882414553, + "loss": 0.5986, + "step": 382 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003639059524103203, + "loss": 0.6, + "step": 383 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003636584545436931, + "loss": 0.5216, + "step": 384 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003634101957922468, + "loss": 0.5144, + "step": 385 + }, + { + "epoch": 0.74, + "learning_rate": 0.00036316117731019184, + "loss": 0.4963, + "step": 386 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003629114002552711, + "loss": 0.5657, + "step": 387 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036266086578875384, + "loss": 0.5028, + "step": 388 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003624095750754311, + "loss": 0.573, + "step": 389 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036215752928360967, + "loss": 0.5199, + "step": 390 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003619047295851068, + "loss": 0.656, + "step": 391 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036165117715524506, + "loss": 0.5129, + "step": 392 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036139687317284647, + "loss": 0.3945, + "step": 393 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003611418188202271, + "loss": 0.5318, + "step": 394 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036088601528319196, + "loss": 0.5344, + "step": 395 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036062946375102885, + "loss": 0.5407, + "step": 396 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003603721654165034, + "loss": 0.5364, + "step": 397 + }, + { + "epoch": 0.77, + "learning_rate": 0.00036011412147585306, + "loss": 0.5407, + "step": 398 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003598553331287821, + "loss": 0.5999, + "step": 399 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003595958015784555, + "loss": 0.624, + "step": 400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00035933552803149354, + "loss": 0.5351, + "step": 401 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003590745136979662, + "loss": 0.5196, + "step": 402 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035881275979138765, + "loss": 0.5447, + "step": 403 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003585502675287104, + "loss": 0.4908, + "step": 404 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035828703813031986, + "loss": 0.5172, + "step": 405 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035802307282002834, + "loss": 0.5923, + "step": 406 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003577583728250699, + "loss": 0.568, + "step": 407 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035749293937609395, + "loss": 0.4618, + "step": 408 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003572267737071601, + "loss": 0.5351, + "step": 409 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003569598770557322, + "loss": 0.5285, + "step": 410 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035669225066267256, + "loss": 0.4571, + "step": 411 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035642389577223625, + "loss": 0.4214, + "step": 412 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003561548136320653, + "loss": 0.5393, + "step": 413 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003558850054931828, + "loss": 0.549, + "step": 414 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035561447260998714, + "loss": 0.4824, + "step": 415 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035534321624024656, + "loss": 0.6244, + "step": 416 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035507123764509245, + "loss": 0.5436, + "step": 417 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003547985380890144, + "loss": 0.5198, + "step": 418 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035452511883985366, + "loss": 0.5979, + "step": 419 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035425098116879754, + "loss": 0.4158, + "step": 420 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035397612635037356, + "loss": 0.5125, + "step": 421 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035370055566244334, + "loss": 0.4699, + "step": 422 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003534242703861966, + "loss": 0.5553, + "step": 423 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035314727180614573, + "loss": 0.5969, + "step": 424 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035286956121011897, + "loss": 0.456, + "step": 425 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003525911398892552, + "loss": 0.5195, + "step": 426 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003523120091379975, + "loss": 0.5187, + "step": 427 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035203217025408726, + "loss": 0.5443, + "step": 428 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003517516245385582, + "loss": 0.4476, + "step": 429 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003514703732957301, + "loss": 0.5757, + "step": 430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035118841783320304, + "loss": 0.5129, + "step": 431 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035090575946185114, + "loss": 0.6354, + "step": 432 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035062239949581645, + "loss": 0.4065, + "step": 433 + }, + { + "epoch": 0.84, + "learning_rate": 0.000350338339252503, + "loss": 0.5472, + "step": 434 + }, + { + "epoch": 0.84, + "learning_rate": 0.00035005358005257045, + "loss": 0.5424, + "step": 435 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034976812321992816, + "loss": 0.6127, + "step": 436 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034948197008172877, + "loss": 0.63, + "step": 437 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003491951219683625, + "loss": 0.413, + "step": 438 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034890758021345034, + "loss": 0.5435, + "step": 439 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034861934615383844, + "loss": 0.5433, + "step": 440 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034833042112959153, + "loss": 0.4763, + "step": 441 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034804080648398667, + "loss": 0.5727, + "step": 442 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034775050356350727, + "loss": 0.5392, + "step": 443 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034745951371783666, + "loss": 0.4981, + "step": 444 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003471678382998518, + "loss": 0.5516, + "step": 445 + }, + { + "epoch": 0.86, + "learning_rate": 0.00034687547866561703, + "loss": 0.4965, + "step": 446 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003465824361743779, + "loss": 0.4982, + "step": 447 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003462887121885544, + "loss": 0.5619, + "step": 448 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003459943080737353, + "loss": 0.5273, + "step": 449 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034569922519867133, + "loss": 0.517, + "step": 450 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034540346493526876, + "loss": 0.4874, + "step": 451 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003451070286585833, + "loss": 0.5966, + "step": 452 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003448099177468137, + "loss": 0.4487, + "step": 453 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003445121335812951, + "loss": 0.5091, + "step": 454 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003442136775464929, + "loss": 0.407, + "step": 455 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003439145510299958, + "loss": 0.6327, + "step": 456 + }, + { + "epoch": 0.88, + "learning_rate": 0.00034361475542251025, + "loss": 0.4217, + "step": 457 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003433142921178531, + "loss": 0.6102, + "step": 458 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003430131625129456, + "loss": 0.5556, + "step": 459 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034271136800780673, + "loss": 0.4986, + "step": 460 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003424089100055467, + "loss": 0.5406, + "step": 461 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034210578991236056, + "loss": 0.5881, + "step": 462 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034180200913752157, + "loss": 0.4869, + "step": 463 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034149756909337454, + "loss": 0.5626, + "step": 464 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003411924711953295, + "loss": 0.564, + "step": 465 + }, + { + "epoch": 0.9, + "learning_rate": 0.00034088671686185486, + "loss": 0.6272, + "step": 466 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003405803075144711, + "loss": 0.4643, + "step": 467 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003402732445777438, + "loss": 0.5435, + "step": 468 + }, + { + "epoch": 0.9, + "learning_rate": 0.00033996552947927744, + "loss": 0.5844, + "step": 469 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003396571636497084, + "loss": 0.5362, + "step": 470 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033934814852269865, + "loss": 0.5607, + "step": 471 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003390384855349285, + "loss": 0.4836, + "step": 472 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033872817612609065, + "loss": 0.6555, + "step": 473 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033841722173888315, + "loss": 0.4784, + "step": 474 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033810562381900253, + "loss": 0.5583, + "step": 475 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033779338381513736, + "loss": 0.4679, + "step": 476 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003374805031789613, + "loss": 0.5325, + "step": 477 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033716698336512654, + "loss": 0.6601, + "step": 478 + }, + { + "epoch": 0.92, + "learning_rate": 0.000336852825831257, + "loss": 0.4838, + "step": 479 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003365380320379414, + "loss": 0.5588, + "step": 480 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033622260344872665, + "loss": 0.4596, + "step": 481 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003359065415301108, + "loss": 0.5228, + "step": 482 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033558984775153663, + "loss": 0.5316, + "step": 483 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033527252358538437, + "loss": 0.4761, + "step": 484 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003349545705069653, + "loss": 0.5254, + "step": 485 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003346359899945144, + "loss": 0.4786, + "step": 486 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033431678352918384, + "loss": 0.4302, + "step": 487 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003339969525950361, + "loss": 0.4914, + "step": 488 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033367649867903663, + "loss": 0.4102, + "step": 489 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003333554232710477, + "loss": 0.4698, + "step": 490 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003330337278638207, + "loss": 0.4454, + "step": 491 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033271141395298964, + "loss": 0.4648, + "step": 492 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033238848303706415, + "loss": 0.4616, + "step": 493 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033206493661742237, + "loss": 0.4861, + "step": 494 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033174077619830416, + "loss": 0.4797, + "step": 495 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033141600328680373, + "loss": 0.5104, + "step": 496 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033109061939286336, + "loss": 0.5712, + "step": 497 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033076462602926553, + "loss": 0.5425, + "step": 498 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033043802471162636, + "loss": 0.6156, + "step": 499 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003301108169583887, + "loss": 0.4282, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003297830042908146, + "loss": 0.4088, + "step": 501 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032945458823297857, + "loss": 0.4866, + "step": 502 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003291255703117605, + "loss": 0.5045, + "step": 503 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003287959520568384, + "loss": 0.491, + "step": 504 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032846573500068136, + "loss": 0.458, + "step": 505 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032813492067854246, + "loss": 0.4508, + "step": 506 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003278035106284516, + "loss": 0.4294, + "step": 507 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032747150639120834, + "loss": 0.4834, + "step": 508 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032713890951037477, + "loss": 0.3857, + "step": 509 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032680572153226834, + "loss": 0.4072, + "step": 510 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003264719440059545, + "loss": 0.4028, + "step": 511 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032613757848323977, + "loss": 0.3789, + "step": 512 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032580262651866446, + "loss": 0.4944, + "step": 513 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003254670896694952, + "loss": 0.4259, + "step": 514 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032513096949571805, + "loss": 0.5037, + "step": 515 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032479426756003093, + "loss": 0.5857, + "step": 516 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003244569854278366, + "loss": 0.5407, + "step": 517 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032411912466723524, + "loss": 0.499, + "step": 518 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003237806868490172, + "loss": 0.4359, + "step": 519 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032344167354665573, + "loss": 0.4374, + "step": 520 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003231020863362997, + "loss": 0.4172, + "step": 521 + }, + { + "epoch": 1.01, + "learning_rate": 0.000322761926796766, + "loss": 0.4451, + "step": 522 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003224211965095326, + "loss": 0.4, + "step": 523 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003220798970587309, + "loss": 0.4009, + "step": 524 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003217380300311386, + "loss": 0.3966, + "step": 525 + }, + { + "epoch": 1.01, + "learning_rate": 0.000321395597016172, + "loss": 0.4255, + "step": 526 + }, + { + "epoch": 1.01, + "learning_rate": 0.00032105259960587895, + "loss": 0.4707, + "step": 527 + }, + { + "epoch": 1.02, + "learning_rate": 0.00032070903939493124, + "loss": 0.5313, + "step": 528 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003203649179806172, + "loss": 0.3596, + "step": 529 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003200202369628345, + "loss": 0.5223, + "step": 530 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031967499794408234, + "loss": 0.4146, + "step": 531 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031932920252945423, + "loss": 0.4328, + "step": 532 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003189828523266306, + "loss": 0.4258, + "step": 533 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031863594894587105, + "loss": 0.4457, + "step": 534 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003182884940000072, + "loss": 0.5249, + "step": 535 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003179404891044348, + "loss": 0.4751, + "step": 536 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031759193587710676, + "loss": 0.5378, + "step": 537 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031724283593852497, + "loss": 0.634, + "step": 538 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031689319091173326, + "loss": 0.4298, + "step": 539 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031654300242230977, + "loss": 0.5469, + "step": 540 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031619227209835917, + "loss": 0.5153, + "step": 541 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003158410015705053, + "loss": 0.4144, + "step": 542 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003154891924718837, + "loss": 0.6041, + "step": 543 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003151368464381335, + "loss": 0.4891, + "step": 544 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003147839651073904, + "loss": 0.5258, + "step": 545 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031443055012027874, + "loss": 0.4351, + "step": 546 + }, + { + "epoch": 1.05, + "learning_rate": 0.000314076603119904, + "loss": 0.4556, + "step": 547 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031372212575184514, + "loss": 0.5445, + "step": 548 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031336711966414675, + "loss": 0.5585, + "step": 549 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003130115865073117, + "loss": 0.367, + "step": 550 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003126555279342933, + "loss": 0.4877, + "step": 551 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003122989456004876, + "loss": 0.4335, + "step": 552 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003119418411637258, + "loss": 0.4383, + "step": 553 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003115842162842663, + "loss": 0.4508, + "step": 554 + }, + { + "epoch": 1.07, + "learning_rate": 0.00031122607262478743, + "loss": 0.4631, + "step": 555 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003108674118503793, + "loss": 0.3496, + "step": 556 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003105082356285361, + "loss": 0.4108, + "step": 557 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003101485456291486, + "loss": 0.4877, + "step": 558 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030978834352449614, + "loss": 0.3696, + "step": 559 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030942763098923913, + "loss": 0.5138, + "step": 560 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030906640970041084, + "loss": 0.5961, + "step": 561 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003087046813374099, + "loss": 0.3824, + "step": 562 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030834244758199276, + "loss": 0.4925, + "step": 563 + }, + { + "epoch": 1.09, + "learning_rate": 0.000307979710118265, + "loss": 0.4511, + "step": 564 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030761647063267457, + "loss": 0.4306, + "step": 565 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003072527308140031, + "loss": 0.468, + "step": 566 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030688849235335856, + "loss": 0.4842, + "step": 567 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003065237569441671, + "loss": 0.4332, + "step": 568 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030615852628216537, + "loss": 0.4637, + "step": 569 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003057928020653925, + "loss": 0.6193, + "step": 570 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003054265859941824, + "loss": 0.5033, + "step": 571 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030505987977115555, + "loss": 0.4185, + "step": 572 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003046926851012114, + "loss": 0.4211, + "step": 573 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003043250036915201, + "loss": 0.5089, + "step": 574 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030395683725151505, + "loss": 0.517, + "step": 575 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003035881874928845, + "loss": 0.492, + "step": 576 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003032190561295636, + "loss": 0.4535, + "step": 577 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003028494448777269, + "loss": 0.3947, + "step": 578 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030247935545577986, + "loss": 0.3125, + "step": 579 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003021087895843511, + "loss": 0.3882, + "step": 580 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003017377489862845, + "loss": 0.4802, + "step": 581 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030136623538663083, + "loss": 0.4652, + "step": 582 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030099425051263994, + "loss": 0.3816, + "step": 583 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003006217960937529, + "loss": 0.4583, + "step": 584 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030024887386159385, + "loss": 0.4568, + "step": 585 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029987548554996174, + "loss": 0.3908, + "step": 586 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002995016328948225, + "loss": 0.4235, + "step": 587 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029912731763430075, + "loss": 0.4138, + "step": 588 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029875254150867216, + "loss": 0.5838, + "step": 589 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002983773062603548, + "loss": 0.462, + "step": 590 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029800161363390145, + "loss": 0.4632, + "step": 591 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029762546537599125, + "loss": 0.5898, + "step": 592 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002972488632354218, + "loss": 0.4742, + "step": 593 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029687180896310065, + "loss": 0.4579, + "step": 594 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002964943043120378, + "loss": 0.5514, + "step": 595 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029611635103733675, + "loss": 0.4304, + "step": 596 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002957379508961871, + "loss": 0.4383, + "step": 597 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029535910564785584, + "loss": 0.5327, + "step": 598 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029497981705367933, + "loss": 0.4781, + "step": 599 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029460008687705525, + "loss": 0.4178, + "step": 600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002942199168834342, + "loss": 0.3987, + "step": 601 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029383930884031183, + "loss": 0.3861, + "step": 602 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029345826451722005, + "loss": 0.5322, + "step": 603 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029307678568571936, + "loss": 0.3997, + "step": 604 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002926948741193903, + "loss": 0.4121, + "step": 605 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029231253159382514, + "loss": 0.4931, + "step": 606 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029192975988662017, + "loss": 0.4626, + "step": 607 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029154656077736666, + "loss": 0.4441, + "step": 608 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002911629360476432, + "loss": 0.3863, + "step": 609 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029077888748100703, + "loss": 0.36, + "step": 610 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029039441686298594, + "loss": 0.4246, + "step": 611 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002900095259810702, + "loss": 0.4916, + "step": 612 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028962421662470346, + "loss": 0.4896, + "step": 613 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028923849058527535, + "loss": 0.4237, + "step": 614 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028885234965611274, + "loss": 0.5727, + "step": 615 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028846579563247116, + "loss": 0.5681, + "step": 616 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002880788303115269, + "loss": 0.4383, + "step": 617 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028769145549236845, + "loss": 0.4962, + "step": 618 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002873036729759881, + "loss": 0.5472, + "step": 619 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002869154845652738, + "loss": 0.5431, + "step": 620 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002865268920650003, + "loss": 0.4152, + "step": 621 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002861378972818211, + "loss": 0.3922, + "step": 622 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002857485020242602, + "loss": 0.5129, + "step": 623 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002853587081027034, + "loss": 0.4328, + "step": 624 + }, + { + "epoch": 1.2, + "learning_rate": 0.00028496851732938997, + "loss": 0.4431, + "step": 625 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002845779315184042, + "loss": 0.4968, + "step": 626 + }, + { + "epoch": 1.21, + "learning_rate": 0.000284186952485667, + "loss": 0.5301, + "step": 627 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002837955820489276, + "loss": 0.4332, + "step": 628 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002834038220277546, + "loss": 0.4245, + "step": 629 + }, + { + "epoch": 1.21, + "learning_rate": 0.00028301167424352836, + "loss": 0.5057, + "step": 630 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028261914051943166, + "loss": 0.4623, + "step": 631 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028222622268044174, + "loss": 0.5452, + "step": 632 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028183292255332164, + "loss": 0.5238, + "step": 633 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028143924196661176, + "loss": 0.3966, + "step": 634 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002810451827506214, + "loss": 0.35, + "step": 635 + }, + { + "epoch": 1.23, + "learning_rate": 0.00028065074673742007, + "loss": 0.4325, + "step": 636 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002802559357608292, + "loss": 0.4854, + "step": 637 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027986075165641343, + "loss": 0.4254, + "step": 638 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027946519626147225, + "loss": 0.4614, + "step": 639 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027906927141503125, + "loss": 0.3798, + "step": 640 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027867297895783373, + "loss": 0.4742, + "step": 641 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002782763207323322, + "loss": 0.4007, + "step": 642 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002778792985826795, + "loss": 0.4383, + "step": 643 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002774819143547206, + "loss": 0.4298, + "step": 644 + }, + { + "epoch": 1.24, + "learning_rate": 0.00027708416989598387, + "loss": 0.5178, + "step": 645 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002766860670556722, + "loss": 0.3434, + "step": 646 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002762876076846551, + "loss": 0.3862, + "step": 647 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027588879363545934, + "loss": 0.4459, + "step": 648 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002754896267622608, + "loss": 0.3934, + "step": 649 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027509010892087565, + "loss": 0.4349, + "step": 650 + }, + { + "epoch": 1.25, + "learning_rate": 0.000274690241968752, + "loss": 0.4178, + "step": 651 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002742900277649607, + "loss": 0.4151, + "step": 652 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002738894681701874, + "loss": 0.3888, + "step": 653 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027348856504672323, + "loss": 0.4214, + "step": 654 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002730873202584567, + "loss": 0.519, + "step": 655 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027268573567086477, + "loss": 0.5463, + "step": 656 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027228381315100417, + "loss": 0.3367, + "step": 657 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027188155456750256, + "loss": 0.4629, + "step": 658 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027147896179055043, + "loss": 0.4456, + "step": 659 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002710760366918917, + "loss": 0.4348, + "step": 660 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002706727811448153, + "loss": 0.4505, + "step": 661 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002702691970241468, + "loss": 0.5028, + "step": 662 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026986528620623904, + "loss": 0.5257, + "step": 663 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026946105056896403, + "loss": 0.4977, + "step": 664 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026905649199170377, + "loss": 0.421, + "step": 665 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002686516123553417, + "loss": 0.4931, + "step": 666 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026824641354225397, + "loss": 0.5818, + "step": 667 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002678408974363005, + "loss": 0.4211, + "step": 668 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026743506592281674, + "loss": 0.5182, + "step": 669 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026702892088860413, + "loss": 0.5591, + "step": 670 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002666224642219221, + "loss": 0.5363, + "step": 671 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002662156978124786, + "loss": 0.5866, + "step": 672 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002658086235514218, + "loss": 0.422, + "step": 673 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002654012433313312, + "loss": 0.5375, + "step": 674 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002649935590462087, + "loss": 0.4752, + "step": 675 + }, + { + "epoch": 1.3, + "learning_rate": 0.00026458557259146986, + "loss": 0.4271, + "step": 676 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002641772858639351, + "loss": 0.4843, + "step": 677 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026376870076182086, + "loss": 0.4827, + "step": 678 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026335981918473086, + "loss": 0.47, + "step": 679 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002629506430336472, + "loss": 0.368, + "step": 680 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026254117421092133, + "loss": 0.481, + "step": 681 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002621314146202656, + "loss": 0.4153, + "step": 682 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002617213661667443, + "loss": 0.4397, + "step": 683 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002613110307567643, + "loss": 0.4052, + "step": 684 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026090041029806695, + "loss": 0.4652, + "step": 685 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026048950669971884, + "loss": 0.3826, + "step": 686 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026007832187210277, + "loss": 0.5639, + "step": 687 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025966685772690906, + "loss": 0.3917, + "step": 688 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025925511617712685, + "loss": 0.5248, + "step": 689 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002588430991370347, + "loss": 0.3796, + "step": 690 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002584308085221922, + "loss": 0.4391, + "step": 691 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025801824624943084, + "loss": 0.4514, + "step": 692 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025760541423684496, + "loss": 0.5046, + "step": 693 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002571923144037831, + "loss": 0.4578, + "step": 694 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002567789486708389, + "loss": 0.4681, + "step": 695 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025636531895984236, + "loss": 0.4501, + "step": 696 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002559514271938506, + "loss": 0.4411, + "step": 697 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025553727529713916, + "loss": 0.401, + "step": 698 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025512286519519293, + "loss": 0.4911, + "step": 699 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002547081988146974, + "loss": 0.3754, + "step": 700 + } + ], + "logging_steps": 1, + "max_steps": 1557, + "num_train_epochs": 3, + "save_steps": 50, + "total_flos": 9.390244643843604e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbdf20491848d40e9a89bca19c6229b4b2b55e5d --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07819caee47f45203545f962678d52b5954ff1fd4afe1d5152fad48004402099 +size 4155 diff --git a/checkpoint-750/README.md b/checkpoint-750/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b32efe7366f05d1d90816d2ad9e4b06ccca46bea --- /dev/null +++ b/checkpoint-750/README.md @@ -0,0 +1,219 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 + +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-750/adapter_config.json b/checkpoint-750/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e108f2da037ef6250457c67a4bedd308d97303c --- /dev/null +++ b/checkpoint-750/adapter_config.json @@ -0,0 +1,24 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-750/adapter_model.bin b/checkpoint-750/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6265a5c9e72d2291284a737f0f39272f8d16567b --- /dev/null +++ b/checkpoint-750/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5997c00f5e8f6eccdb56e961f1f52738da6811b9e1f8f7db68cc06d230fa63e6 +size 113314765 diff --git a/checkpoint-750/optimizer.pt b/checkpoint-750/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0eb309fead5ea47a366d9047225f99eb279814eb --- /dev/null +++ b/checkpoint-750/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d035f8f714d8f59687635c261f0c44aa27f208c4f133ed451e6712f921d9a7a2 +size 226653957 diff --git a/checkpoint-750/rng_state.pth b/checkpoint-750/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b84d89f04526af3df117562115ac4f6cf91ccc53 --- /dev/null +++ b/checkpoint-750/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5e71c190b39ebb7d181ccf3ca246501e0db9c691f3efccf8fad4d44e71dde25 +size 14575 diff --git a/checkpoint-750/scheduler.pt b/checkpoint-750/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2cc3cd20e5bad09df20e8bfa79447b0e0afad6f --- /dev/null +++ b/checkpoint-750/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a877afe6d21b07682330f27e66aabc1b0aa1dfc72a9aa997eb4a4f2512ca27 +size 627 diff --git a/checkpoint-750/trainer_state.json b/checkpoint-750/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3cb1819cc48fbb76128230079b75c48ee5a39307 --- /dev/null +++ b/checkpoint-750/trainer_state.json @@ -0,0 +1,4519 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4448188711036225, + "eval_steps": 500, + "global_step": 750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6869, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.8396, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 0.7489, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7252, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.6548, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 0.8022, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6524, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6981, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 0.7488, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 0.6368, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.6891, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 0.7968, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 5.2000000000000004e-05, + "loss": 0.6912, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 5.6000000000000006e-05, + "loss": 0.8452, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 6e-05, + "loss": 0.6989, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 6.400000000000001e-05, + "loss": 0.6685, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-05, + "loss": 0.5469, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 7.2e-05, + "loss": 0.7915, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 7.6e-05, + "loss": 0.7744, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 8e-05, + "loss": 0.6804, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 8.4e-05, + "loss": 0.7796, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 8.800000000000001e-05, + "loss": 0.706, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000001e-05, + "loss": 0.6798, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 9.6e-05, + "loss": 0.6333, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001, + "loss": 0.6012, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010400000000000001, + "loss": 0.52, + "step": 26 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010800000000000001, + "loss": 0.6583, + "step": 27 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011200000000000001, + "loss": 0.7354, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 0.000116, + "loss": 0.6296, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012, + "loss": 0.6352, + "step": 30 + }, + { + "epoch": 0.06, + "learning_rate": 0.000124, + "loss": 0.6007, + "step": 31 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012800000000000002, + "loss": 0.5659, + "step": 32 + }, + { + "epoch": 0.06, + "learning_rate": 0.000132, + "loss": 0.5138, + "step": 33 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013600000000000003, + "loss": 0.6639, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014, + "loss": 0.5934, + "step": 35 + }, + { + "epoch": 0.07, + "learning_rate": 0.000144, + "loss": 0.5233, + "step": 36 + }, + { + "epoch": 0.07, + "learning_rate": 0.000148, + "loss": 0.5307, + "step": 37 + }, + { + "epoch": 0.07, + "learning_rate": 0.000152, + "loss": 0.5928, + "step": 38 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015600000000000002, + "loss": 0.5908, + "step": 39 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016, + "loss": 0.6366, + "step": 40 + }, + { + "epoch": 0.08, + "learning_rate": 0.000164, + "loss": 0.5972, + "step": 41 + }, + { + "epoch": 0.08, + "learning_rate": 0.000168, + "loss": 0.4825, + "step": 42 + }, + { + "epoch": 0.08, + "learning_rate": 0.000172, + "loss": 0.6783, + "step": 43 + }, + { + "epoch": 0.08, + "learning_rate": 0.00017600000000000002, + "loss": 0.6082, + "step": 44 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018, + "loss": 0.7633, + "step": 45 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018400000000000003, + "loss": 0.5988, + "step": 46 + }, + { + "epoch": 0.09, + "learning_rate": 0.000188, + "loss": 0.6658, + "step": 47 + }, + { + "epoch": 0.09, + "learning_rate": 0.000192, + "loss": 0.5945, + "step": 48 + }, + { + "epoch": 0.09, + "learning_rate": 0.000196, + "loss": 0.5984, + "step": 49 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002, + "loss": 0.6778, + "step": 50 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020400000000000003, + "loss": 0.6057, + "step": 51 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020800000000000001, + "loss": 0.601, + "step": 52 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021200000000000003, + "loss": 0.5566, + "step": 53 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021600000000000002, + "loss": 0.5911, + "step": 54 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022000000000000003, + "loss": 0.7636, + "step": 55 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022400000000000002, + "loss": 0.5537, + "step": 56 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022799999999999999, + "loss": 0.6037, + "step": 57 + }, + { + "epoch": 0.11, + "learning_rate": 0.000232, + "loss": 0.6474, + "step": 58 + }, + { + "epoch": 0.11, + "learning_rate": 0.000236, + "loss": 0.6483, + "step": 59 + }, + { + "epoch": 0.12, + "learning_rate": 0.00024, + "loss": 0.5021, + "step": 60 + }, + { + "epoch": 0.12, + "learning_rate": 0.000244, + "loss": 0.5347, + "step": 61 + }, + { + "epoch": 0.12, + "learning_rate": 0.000248, + "loss": 0.5791, + "step": 62 + }, + { + "epoch": 0.12, + "learning_rate": 0.000252, + "loss": 0.5407, + "step": 63 + }, + { + "epoch": 0.12, + "learning_rate": 0.00025600000000000004, + "loss": 0.5298, + "step": 64 + }, + { + "epoch": 0.13, + "learning_rate": 0.00026000000000000003, + "loss": 0.5685, + "step": 65 + }, + { + "epoch": 0.13, + "learning_rate": 0.000264, + "loss": 0.5108, + "step": 66 + }, + { + "epoch": 0.13, + "learning_rate": 0.000268, + "loss": 0.526, + "step": 67 + }, + { + "epoch": 0.13, + "learning_rate": 0.00027200000000000005, + "loss": 0.6843, + "step": 68 + }, + { + "epoch": 0.13, + "learning_rate": 0.000276, + "loss": 0.6608, + "step": 69 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028, + "loss": 0.5866, + "step": 70 + }, + { + "epoch": 0.14, + "learning_rate": 0.000284, + "loss": 0.6422, + "step": 71 + }, + { + "epoch": 0.14, + "learning_rate": 0.000288, + "loss": 0.449, + "step": 72 + }, + { + "epoch": 0.14, + "learning_rate": 0.000292, + "loss": 0.5319, + "step": 73 + }, + { + "epoch": 0.14, + "learning_rate": 0.000296, + "loss": 0.5977, + "step": 74 + }, + { + "epoch": 0.14, + "learning_rate": 0.00030000000000000003, + "loss": 0.5805, + "step": 75 + }, + { + "epoch": 0.15, + "learning_rate": 0.000304, + "loss": 0.5209, + "step": 76 + }, + { + "epoch": 0.15, + "learning_rate": 0.000308, + "loss": 0.6098, + "step": 77 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031200000000000005, + "loss": 0.4665, + "step": 78 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031600000000000004, + "loss": 0.6882, + "step": 79 + }, + { + "epoch": 0.15, + "learning_rate": 0.00032, + "loss": 0.5427, + "step": 80 + }, + { + "epoch": 0.16, + "learning_rate": 0.000324, + "loss": 0.5345, + "step": 81 + }, + { + "epoch": 0.16, + "learning_rate": 0.000328, + "loss": 0.663, + "step": 82 + }, + { + "epoch": 0.16, + "learning_rate": 0.000332, + "loss": 0.5393, + "step": 83 + }, + { + "epoch": 0.16, + "learning_rate": 0.000336, + "loss": 0.5711, + "step": 84 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034, + "loss": 0.5261, + "step": 85 + }, + { + "epoch": 0.17, + "learning_rate": 0.000344, + "loss": 0.5775, + "step": 86 + }, + { + "epoch": 0.17, + "learning_rate": 0.000348, + "loss": 0.6329, + "step": 87 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035200000000000005, + "loss": 0.4425, + "step": 88 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035600000000000003, + "loss": 0.6837, + "step": 89 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036, + "loss": 0.615, + "step": 90 + }, + { + "epoch": 0.18, + "learning_rate": 0.000364, + "loss": 0.5615, + "step": 91 + }, + { + "epoch": 0.18, + "learning_rate": 0.00036800000000000005, + "loss": 0.5434, + "step": 92 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037200000000000004, + "loss": 0.5864, + "step": 93 + }, + { + "epoch": 0.18, + "learning_rate": 0.000376, + "loss": 0.5583, + "step": 94 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038, + "loss": 0.5299, + "step": 95 + }, + { + "epoch": 0.18, + "learning_rate": 0.000384, + "loss": 0.532, + "step": 96 + }, + { + "epoch": 0.19, + "learning_rate": 0.000388, + "loss": 0.5227, + "step": 97 + }, + { + "epoch": 0.19, + "learning_rate": 0.000392, + "loss": 0.5275, + "step": 98 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039600000000000003, + "loss": 0.4541, + "step": 99 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004, + "loss": 0.6485, + "step": 100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003999995350775973, + "loss": 0.5438, + "step": 101 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999814031255063, + "loss": 0.5997, + "step": 102 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999581571134455, + "loss": 0.5322, + "step": 103 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003999925612847867, + "loss": 0.484, + "step": 104 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998837704800766, + "loss": 0.5961, + "step": 105 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998326302046085, + "loss": 0.7405, + "step": 106 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997721922592255, + "loss": 0.5802, + "step": 107 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997024569249167, + "loss": 0.769, + "step": 108 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999623424525898, + "loss": 0.5598, + "step": 109 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999535095429608, + "loss": 0.6143, + "step": 110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039994374700467095, + "loss": 0.5766, + "step": 111 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039993305488310836, + "loss": 0.7695, + "step": 112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999214332279831, + "loss": 0.7153, + "step": 113 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999088820933269, + "loss": 0.5835, + "step": 114 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039989540153749286, + "loss": 0.6634, + "step": 115 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399880991623155, + "loss": 0.6069, + "step": 116 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998656524173082, + "loss": 0.7224, + "step": 117 + }, + { + "epoch": 0.23, + "learning_rate": 0.000399849383991268, + "loss": 0.5884, + "step": 118 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998321864206699, + "loss": 0.5122, + "step": 119 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039981405978546924, + "loss": 0.6453, + "step": 120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997950041699408, + "loss": 0.4665, + "step": 121 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997750196626785, + "loss": 0.5428, + "step": 122 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039975410635659464, + "loss": 0.4365, + "step": 123 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039973226434891995, + "loss": 0.5978, + "step": 124 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039970949374120286, + "loss": 0.7729, + "step": 125 + }, + { + "epoch": 0.24, + "learning_rate": 0.000399685794639309, + "loss": 0.6212, + "step": 126 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039966116715342066, + "loss": 0.5426, + "step": 127 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039963561139803676, + "loss": 0.5782, + "step": 128 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003996091274919716, + "loss": 0.6701, + "step": 129 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995817155583548, + "loss": 0.6314, + "step": 130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995533757246307, + "loss": 0.6662, + "step": 131 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995241081225573, + "loss": 0.5192, + "step": 132 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994939128882065, + "loss": 0.5591, + "step": 133 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994627901619625, + "loss": 0.5809, + "step": 134 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994307400885219, + "loss": 0.4871, + "step": 135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993977628168928, + "loss": 0.6666, + "step": 136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993638585003938, + "loss": 0.6469, + "step": 137 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039932902729665357, + "loss": 0.5727, + "step": 138 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039929326936761036, + "loss": 0.6715, + "step": 139 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039925658487951067, + "loss": 0.5686, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039921897400290894, + "loss": 0.501, + "step": 141 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039918043691266665, + "loss": 0.5795, + "step": 142 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039914097378795124, + "loss": 0.6287, + "step": 143 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039910058481223564, + "loss": 0.7016, + "step": 144 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039905927017329726, + "loss": 0.6232, + "step": 145 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039901703006321715, + "loss": 0.5291, + "step": 146 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039897386467837903, + "loss": 0.5297, + "step": 147 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892977421946844, + "loss": 0.5784, + "step": 148 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988847588914718, + "loss": 0.5714, + "step": 149 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988388189036754, + "loss": 0.5044, + "step": 150 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987919544696646, + "loss": 0.8246, + "step": 151 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987441658073226, + "loss": 0.5048, + "step": 152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003986954531388297, + "loss": 0.5433, + "step": 153 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039864581669066186, + "loss": 0.5251, + "step": 154 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003985952566935902, + "loss": 0.5708, + "step": 155 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039854377338267936, + "loss": 0.6276, + "step": 156 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039849136699728684, + "loss": 0.4915, + "step": 157 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003984380377810617, + "loss": 0.6389, + "step": 158 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039838378598194325, + "loss": 0.6067, + "step": 159 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039832861185216045, + "loss": 0.6136, + "step": 160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003982725156482301, + "loss": 0.5597, + "step": 161 + }, + { + "epoch": 0.31, + "learning_rate": 0.000398215497630956, + "loss": 0.5957, + "step": 162 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003981575580654278, + "loss": 0.5853, + "step": 163 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980986972210194, + "loss": 0.5462, + "step": 164 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980389153713881, + "loss": 0.5302, + "step": 165 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039797821279447307, + "loss": 0.5395, + "step": 166 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039791658977249425, + "loss": 0.7004, + "step": 167 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039785404659195084, + "loss": 0.5622, + "step": 168 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039779058354362013, + "loss": 0.5759, + "step": 169 + }, + { + "epoch": 0.33, + "learning_rate": 0.000397726200922556, + "loss": 0.6184, + "step": 170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003976608990280877, + "loss": 0.5488, + "step": 171 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975946781638183, + "loss": 0.6162, + "step": 172 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975275386376236, + "loss": 0.558, + "step": 173 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003974594807616502, + "loss": 0.519, + "step": 174 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003973905048523144, + "loss": 0.6195, + "step": 175 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039732061123030064, + "loss": 0.5991, + "step": 176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003972498002205601, + "loss": 0.5428, + "step": 177 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039717807215230896, + "loss": 0.5323, + "step": 178 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039710542735902705, + "loss": 0.5307, + "step": 179 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003970318661784564, + "loss": 0.5783, + "step": 180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003969573889525993, + "loss": 0.5924, + "step": 181 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039688199602771714, + "loss": 0.5902, + "step": 182 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039680568775432855, + "loss": 0.6291, + "step": 183 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003967284644872077, + "loss": 0.5942, + "step": 184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003966503265853829, + "loss": 0.4878, + "step": 185 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003965712744121347, + "loss": 0.6487, + "step": 186 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003964913083349945, + "loss": 0.6111, + "step": 187 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039641042872574233, + "loss": 0.6072, + "step": 188 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039632863596040575, + "loss": 0.716, + "step": 189 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039624593041925763, + "loss": 0.6178, + "step": 190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003961623124868145, + "loss": 0.6323, + "step": 191 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039607778255183485, + "loss": 0.5821, + "step": 192 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959923410073174, + "loss": 0.6738, + "step": 193 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959059882504989, + "loss": 0.6203, + "step": 194 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039581872468285277, + "loss": 0.632, + "step": 195 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003957305507100868, + "loss": 0.5857, + "step": 196 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039564146674214164, + "loss": 0.6311, + "step": 197 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003955514731931885, + "loss": 0.5889, + "step": 198 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039546057048162763, + "loss": 0.5201, + "step": 199 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039536875903008607, + "loss": 0.5581, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039527603926541586, + "loss": 0.5104, + "step": 201 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039518241161869193, + "loss": 0.5978, + "step": 202 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039508787652521013, + "loss": 0.6244, + "step": 203 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039499243442448536, + "loss": 0.589, + "step": 204 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003948960857602493, + "loss": 0.575, + "step": 205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947988309804485, + "loss": 0.5494, + "step": 206 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947006705372422, + "loss": 0.4895, + "step": 207 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039460160488700036, + "loss": 0.5479, + "step": 208 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039450163449030124, + "loss": 0.5893, + "step": 209 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003944007598119297, + "loss": 0.5451, + "step": 210 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003942989813208747, + "loss": 0.5582, + "step": 211 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003941962994903273, + "loss": 0.5121, + "step": 212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039409271479767826, + "loss": 0.6324, + "step": 213 + }, + { + "epoch": 0.41, + "learning_rate": 0.000393988227724516, + "loss": 0.6118, + "step": 214 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003938828387566244, + "loss": 0.6303, + "step": 215 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003937765483839804, + "loss": 0.7705, + "step": 216 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003936693571007517, + "loss": 0.6224, + "step": 217 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003935612654052946, + "loss": 0.5664, + "step": 218 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039345227380015163, + "loss": 0.66, + "step": 219 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039334238279204906, + "loss": 0.5582, + "step": 220 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039323159289189505, + "loss": 0.6087, + "step": 221 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003931199046147764, + "loss": 0.5566, + "step": 222 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039300731847995716, + "loss": 0.5775, + "step": 223 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039289383501087534, + "loss": 0.5081, + "step": 224 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039277945473514104, + "loss": 0.5218, + "step": 225 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003926641781845338, + "loss": 0.6655, + "step": 226 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003925480058950002, + "loss": 0.5735, + "step": 227 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039243093840665114, + "loss": 0.6609, + "step": 228 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003923129762637596, + "loss": 0.7323, + "step": 229 + }, + { + "epoch": 0.44, + "learning_rate": 0.000392194120014758, + "loss": 0.5703, + "step": 230 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039207437021223583, + "loss": 0.6545, + "step": 231 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003919537274129366, + "loss": 0.521, + "step": 232 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039183219217775564, + "loss": 0.5257, + "step": 233 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003917097650717377, + "loss": 0.5487, + "step": 234 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039158644666407365, + "loss": 0.4861, + "step": 235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039146223752809845, + "loss": 0.4928, + "step": 236 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003913371382412883, + "loss": 0.5253, + "step": 237 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039121114938525756, + "loss": 0.6155, + "step": 238 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039108427154575684, + "loss": 0.55, + "step": 239 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039095650531266967, + "loss": 0.6617, + "step": 240 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039082785128000976, + "loss": 0.5198, + "step": 241 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039069831004591866, + "loss": 0.5302, + "step": 242 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003905678822126625, + "loss": 0.5347, + "step": 243 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039043656838662946, + "loss": 0.531, + "step": 244 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039030436917832697, + "loss": 0.4884, + "step": 245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039017128520237883, + "loss": 0.6027, + "step": 246 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003900373170775222, + "loss": 0.5537, + "step": 247 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038990246542660494, + "loss": 0.5753, + "step": 248 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038976673087658256, + "loss": 0.5059, + "step": 249 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038963011405851537, + "loss": 0.5118, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038949261560756565, + "loss": 0.5645, + "step": 251 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003893542361629944, + "loss": 0.5623, + "step": 252 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038921497636815866, + "loss": 0.5216, + "step": 253 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003890748368705085, + "loss": 0.4501, + "step": 254 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003889338183215838, + "loss": 0.48, + "step": 255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038879192137701135, + "loss": 0.5218, + "step": 256 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003886491466965018, + "loss": 0.5858, + "step": 257 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038850549494384685, + "loss": 0.6124, + "step": 258 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038836096678691536, + "loss": 0.4645, + "step": 259 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038821556289765136, + "loss": 0.474, + "step": 260 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038806928395207003, + "loss": 0.4364, + "step": 261 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038792213063025484, + "loss": 0.5821, + "step": 262 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003877741036163547, + "loss": 0.5393, + "step": 263 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003876252035985804, + "loss": 0.5373, + "step": 264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003874754312692013, + "loss": 0.6021, + "step": 265 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003873247873245426, + "loss": 0.4549, + "step": 266 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003871732724649817, + "loss": 0.5994, + "step": 267 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003870208873949453, + "loss": 0.4764, + "step": 268 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038686763282290556, + "loss": 0.4311, + "step": 269 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003867135094613774, + "loss": 0.5462, + "step": 270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003865585180269148, + "loss": 0.5006, + "step": 271 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003864026592401076, + "loss": 0.5347, + "step": 272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038624593382557835, + "loss": 0.5242, + "step": 273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038608834251197856, + "loss": 0.5005, + "step": 274 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038592988603198554, + "loss": 0.5436, + "step": 275 + }, + { + "epoch": 0.53, + "learning_rate": 0.000385770565122299, + "loss": 0.4658, + "step": 276 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003856103805236375, + "loss": 0.5273, + "step": 277 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038544933298073516, + "loss": 0.436, + "step": 278 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038528742324233804, + "loss": 0.4785, + "step": 279 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038512465206120086, + "loss": 0.5366, + "step": 280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038496102019408324, + "loss": 0.4448, + "step": 281 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038479652840174637, + "loss": 0.5132, + "step": 282 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038463117744894955, + "loss": 0.7918, + "step": 283 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038446496810444627, + "loss": 0.5309, + "step": 284 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038429790114098114, + "loss": 0.5316, + "step": 285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038412997733528576, + "loss": 0.4611, + "step": 286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038396119746807563, + "loss": 0.4609, + "step": 287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038379156232404613, + "loss": 0.5821, + "step": 288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003836210726918691, + "loss": 0.5883, + "step": 289 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003834497293641889, + "loss": 0.5012, + "step": 290 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038327753313761913, + "loss": 0.4457, + "step": 291 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038310448481273867, + "loss": 0.4851, + "step": 292 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038293058519408787, + "loss": 0.5622, + "step": 293 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038275583509016507, + "loss": 0.5703, + "step": 294 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038258023531342265, + "loss": 0.5718, + "step": 295 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003824037866802632, + "loss": 0.5183, + "step": 296 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038222649001103614, + "loss": 0.5085, + "step": 297 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038204834613003323, + "loss": 0.5388, + "step": 298 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038186935586548537, + "loss": 0.5425, + "step": 299 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003816895200495584, + "loss": 0.447, + "step": 300 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003815088395183493, + "loss": 0.5541, + "step": 301 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038132731511188227, + "loss": 0.5518, + "step": 302 + }, + { + "epoch": 0.58, + "learning_rate": 0.000381144947674105, + "loss": 0.5074, + "step": 303 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809617380528847, + "loss": 0.5134, + "step": 304 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807776871000037, + "loss": 0.4599, + "step": 305 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805927956711562, + "loss": 0.5838, + "step": 306 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038040706462594395, + "loss": 0.5216, + "step": 307 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038022049482787216, + "loss": 0.5323, + "step": 308 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003800330871443456, + "loss": 0.5681, + "step": 309 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037984484244666446, + "loss": 0.4172, + "step": 310 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003796557616100207, + "loss": 0.4958, + "step": 311 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003794658455134934, + "loss": 0.662, + "step": 312 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003792750950400451, + "loss": 0.5832, + "step": 313 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003790835110765174, + "loss": 0.4271, + "step": 314 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003788910945136271, + "loss": 0.4842, + "step": 315 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037869784624596186, + "loss": 0.4656, + "step": 316 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037850376717197626, + "loss": 0.4981, + "step": 317 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037830885819398733, + "loss": 0.5162, + "step": 318 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037811312021817067, + "loss": 0.652, + "step": 319 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003779165541545558, + "loss": 0.5104, + "step": 320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003777191609170225, + "loss": 0.4971, + "step": 321 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003775209414232962, + "loss": 0.4871, + "step": 322 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003773218965949436, + "loss": 0.5226, + "step": 323 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037712202735736884, + "loss": 0.4823, + "step": 324 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003769213346398087, + "loss": 0.497, + "step": 325 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003767198193753286, + "loss": 0.5976, + "step": 326 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765174825008181, + "loss": 0.4532, + "step": 327 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003763143249569868, + "loss": 0.5236, + "step": 328 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037611034768835947, + "loss": 0.6513, + "step": 329 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037590555164327224, + "loss": 0.5686, + "step": 330 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037569993777386774, + "loss": 0.456, + "step": 331 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003754935070360909, + "loss": 0.5181, + "step": 332 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003752862603896846, + "loss": 0.4765, + "step": 333 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037507819879818477, + "loss": 0.5363, + "step": 334 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037486932322891646, + "loss": 0.4584, + "step": 335 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037465963465298886, + "loss": 0.5428, + "step": 336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003744491340452913, + "loss": 0.3927, + "step": 337 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003742378223844882, + "loss": 0.5478, + "step": 338 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003740257006530147, + "loss": 0.469, + "step": 339 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037381276983707246, + "loss": 0.5169, + "step": 340 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037359903092662434, + "loss": 0.4797, + "step": 341 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037338448491539054, + "loss": 0.5315, + "step": 342 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037316913280084353, + "loss": 0.4422, + "step": 343 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003729529755842035, + "loss": 0.4426, + "step": 344 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003727360142704337, + "loss": 0.4718, + "step": 345 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003725182498682361, + "loss": 0.5585, + "step": 346 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003722996833900459, + "loss": 0.4775, + "step": 347 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003720803158520279, + "loss": 0.6014, + "step": 348 + }, + { + "epoch": 0.67, + "learning_rate": 0.00037186014827407076, + "loss": 0.5117, + "step": 349 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003716391816797829, + "loss": 0.5404, + "step": 350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003714174170964876, + "loss": 0.527, + "step": 351 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037119485555521796, + "loss": 0.4555, + "step": 352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037097149809071255, + "loss": 0.5372, + "step": 353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037074734574141016, + "loss": 0.5377, + "step": 354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003705223995494454, + "loss": 0.4925, + "step": 355 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037029666056064345, + "loss": 0.482, + "step": 356 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037007012982451546, + "loss": 0.5235, + "step": 357 + }, + { + "epoch": 0.69, + "learning_rate": 0.00036984280839425356, + "loss": 0.4957, + "step": 358 + }, + { + "epoch": 0.69, + "learning_rate": 0.000369614697326726, + "loss": 0.5379, + "step": 359 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003693857976824721, + "loss": 0.4653, + "step": 360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036915611052569785, + "loss": 0.469, + "step": 361 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003689256369242702, + "loss": 0.5618, + "step": 362 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003686943779497124, + "loss": 0.4459, + "step": 363 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003684623346771995, + "loss": 0.5606, + "step": 364 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003682295081855524, + "loss": 0.4368, + "step": 365 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036799589955723375, + "loss": 0.4168, + "step": 366 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036776150987834243, + "loss": 0.4664, + "step": 367 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036752634023860846, + "loss": 0.4737, + "step": 368 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003672903917313883, + "loss": 0.4247, + "step": 369 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036705366545365935, + "loss": 0.5677, + "step": 370 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036681616250601505, + "loss": 0.5441, + "step": 371 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003665778839926599, + "loss": 0.6247, + "step": 372 + }, + { + "epoch": 0.72, + "learning_rate": 0.00036633883102140405, + "loss": 0.5217, + "step": 373 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003660990047036584, + "loss": 0.4651, + "step": 374 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003658584061544291, + "loss": 0.4648, + "step": 375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003656170364923128, + "loss": 0.6048, + "step": 376 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036537489683949114, + "loss": 0.4515, + "step": 377 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003651319883217255, + "loss": 0.5096, + "step": 378 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036488831206835207, + "loss": 0.4231, + "step": 379 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036464386921227637, + "loss": 0.4903, + "step": 380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036439866088996796, + "loss": 0.5131, + "step": 381 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003641526882414553, + "loss": 0.5986, + "step": 382 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003639059524103203, + "loss": 0.6, + "step": 383 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003636584545436931, + "loss": 0.5216, + "step": 384 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003634101957922468, + "loss": 0.5144, + "step": 385 + }, + { + "epoch": 0.74, + "learning_rate": 0.00036316117731019184, + "loss": 0.4963, + "step": 386 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003629114002552711, + "loss": 0.5657, + "step": 387 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036266086578875384, + "loss": 0.5028, + "step": 388 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003624095750754311, + "loss": 0.573, + "step": 389 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036215752928360967, + "loss": 0.5199, + "step": 390 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003619047295851068, + "loss": 0.656, + "step": 391 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036165117715524506, + "loss": 0.5129, + "step": 392 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036139687317284647, + "loss": 0.3945, + "step": 393 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003611418188202271, + "loss": 0.5318, + "step": 394 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036088601528319196, + "loss": 0.5344, + "step": 395 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036062946375102885, + "loss": 0.5407, + "step": 396 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003603721654165034, + "loss": 0.5364, + "step": 397 + }, + { + "epoch": 0.77, + "learning_rate": 0.00036011412147585306, + "loss": 0.5407, + "step": 398 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003598553331287821, + "loss": 0.5999, + "step": 399 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003595958015784555, + "loss": 0.624, + "step": 400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00035933552803149354, + "loss": 0.5351, + "step": 401 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003590745136979662, + "loss": 0.5196, + "step": 402 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035881275979138765, + "loss": 0.5447, + "step": 403 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003585502675287104, + "loss": 0.4908, + "step": 404 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035828703813031986, + "loss": 0.5172, + "step": 405 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035802307282002834, + "loss": 0.5923, + "step": 406 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003577583728250699, + "loss": 0.568, + "step": 407 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035749293937609395, + "loss": 0.4618, + "step": 408 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003572267737071601, + "loss": 0.5351, + "step": 409 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003569598770557322, + "loss": 0.5285, + "step": 410 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035669225066267256, + "loss": 0.4571, + "step": 411 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035642389577223625, + "loss": 0.4214, + "step": 412 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003561548136320653, + "loss": 0.5393, + "step": 413 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003558850054931828, + "loss": 0.549, + "step": 414 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035561447260998714, + "loss": 0.4824, + "step": 415 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035534321624024656, + "loss": 0.6244, + "step": 416 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035507123764509245, + "loss": 0.5436, + "step": 417 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003547985380890144, + "loss": 0.5198, + "step": 418 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035452511883985366, + "loss": 0.5979, + "step": 419 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035425098116879754, + "loss": 0.4158, + "step": 420 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035397612635037356, + "loss": 0.5125, + "step": 421 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035370055566244334, + "loss": 0.4699, + "step": 422 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003534242703861966, + "loss": 0.5553, + "step": 423 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035314727180614573, + "loss": 0.5969, + "step": 424 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035286956121011897, + "loss": 0.456, + "step": 425 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003525911398892552, + "loss": 0.5195, + "step": 426 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003523120091379975, + "loss": 0.5187, + "step": 427 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035203217025408726, + "loss": 0.5443, + "step": 428 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003517516245385582, + "loss": 0.4476, + "step": 429 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003514703732957301, + "loss": 0.5757, + "step": 430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035118841783320304, + "loss": 0.5129, + "step": 431 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035090575946185114, + "loss": 0.6354, + "step": 432 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035062239949581645, + "loss": 0.4065, + "step": 433 + }, + { + "epoch": 0.84, + "learning_rate": 0.000350338339252503, + "loss": 0.5472, + "step": 434 + }, + { + "epoch": 0.84, + "learning_rate": 0.00035005358005257045, + "loss": 0.5424, + "step": 435 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034976812321992816, + "loss": 0.6127, + "step": 436 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034948197008172877, + "loss": 0.63, + "step": 437 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003491951219683625, + "loss": 0.413, + "step": 438 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034890758021345034, + "loss": 0.5435, + "step": 439 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034861934615383844, + "loss": 0.5433, + "step": 440 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034833042112959153, + "loss": 0.4763, + "step": 441 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034804080648398667, + "loss": 0.5727, + "step": 442 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034775050356350727, + "loss": 0.5392, + "step": 443 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034745951371783666, + "loss": 0.4981, + "step": 444 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003471678382998518, + "loss": 0.5516, + "step": 445 + }, + { + "epoch": 0.86, + "learning_rate": 0.00034687547866561703, + "loss": 0.4965, + "step": 446 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003465824361743779, + "loss": 0.4982, + "step": 447 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003462887121885544, + "loss": 0.5619, + "step": 448 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003459943080737353, + "loss": 0.5273, + "step": 449 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034569922519867133, + "loss": 0.517, + "step": 450 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034540346493526876, + "loss": 0.4874, + "step": 451 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003451070286585833, + "loss": 0.5966, + "step": 452 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003448099177468137, + "loss": 0.4487, + "step": 453 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003445121335812951, + "loss": 0.5091, + "step": 454 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003442136775464929, + "loss": 0.407, + "step": 455 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003439145510299958, + "loss": 0.6327, + "step": 456 + }, + { + "epoch": 0.88, + "learning_rate": 0.00034361475542251025, + "loss": 0.4217, + "step": 457 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003433142921178531, + "loss": 0.6102, + "step": 458 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003430131625129456, + "loss": 0.5556, + "step": 459 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034271136800780673, + "loss": 0.4986, + "step": 460 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003424089100055467, + "loss": 0.5406, + "step": 461 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034210578991236056, + "loss": 0.5881, + "step": 462 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034180200913752157, + "loss": 0.4869, + "step": 463 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034149756909337454, + "loss": 0.5626, + "step": 464 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003411924711953295, + "loss": 0.564, + "step": 465 + }, + { + "epoch": 0.9, + "learning_rate": 0.00034088671686185486, + "loss": 0.6272, + "step": 466 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003405803075144711, + "loss": 0.4643, + "step": 467 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003402732445777438, + "loss": 0.5435, + "step": 468 + }, + { + "epoch": 0.9, + "learning_rate": 0.00033996552947927744, + "loss": 0.5844, + "step": 469 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003396571636497084, + "loss": 0.5362, + "step": 470 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033934814852269865, + "loss": 0.5607, + "step": 471 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003390384855349285, + "loss": 0.4836, + "step": 472 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033872817612609065, + "loss": 0.6555, + "step": 473 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033841722173888315, + "loss": 0.4784, + "step": 474 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033810562381900253, + "loss": 0.5583, + "step": 475 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033779338381513736, + "loss": 0.4679, + "step": 476 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003374805031789613, + "loss": 0.5325, + "step": 477 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033716698336512654, + "loss": 0.6601, + "step": 478 + }, + { + "epoch": 0.92, + "learning_rate": 0.000336852825831257, + "loss": 0.4838, + "step": 479 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003365380320379414, + "loss": 0.5588, + "step": 480 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033622260344872665, + "loss": 0.4596, + "step": 481 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003359065415301108, + "loss": 0.5228, + "step": 482 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033558984775153663, + "loss": 0.5316, + "step": 483 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033527252358538437, + "loss": 0.4761, + "step": 484 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003349545705069653, + "loss": 0.5254, + "step": 485 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003346359899945144, + "loss": 0.4786, + "step": 486 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033431678352918384, + "loss": 0.4302, + "step": 487 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003339969525950361, + "loss": 0.4914, + "step": 488 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033367649867903663, + "loss": 0.4102, + "step": 489 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003333554232710477, + "loss": 0.4698, + "step": 490 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003330337278638207, + "loss": 0.4454, + "step": 491 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033271141395298964, + "loss": 0.4648, + "step": 492 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033238848303706415, + "loss": 0.4616, + "step": 493 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033206493661742237, + "loss": 0.4861, + "step": 494 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033174077619830416, + "loss": 0.4797, + "step": 495 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033141600328680373, + "loss": 0.5104, + "step": 496 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033109061939286336, + "loss": 0.5712, + "step": 497 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033076462602926553, + "loss": 0.5425, + "step": 498 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033043802471162636, + "loss": 0.6156, + "step": 499 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003301108169583887, + "loss": 0.4282, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003297830042908146, + "loss": 0.4088, + "step": 501 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032945458823297857, + "loss": 0.4866, + "step": 502 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003291255703117605, + "loss": 0.5045, + "step": 503 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003287959520568384, + "loss": 0.491, + "step": 504 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032846573500068136, + "loss": 0.458, + "step": 505 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032813492067854246, + "loss": 0.4508, + "step": 506 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003278035106284516, + "loss": 0.4294, + "step": 507 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032747150639120834, + "loss": 0.4834, + "step": 508 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032713890951037477, + "loss": 0.3857, + "step": 509 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032680572153226834, + "loss": 0.4072, + "step": 510 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003264719440059545, + "loss": 0.4028, + "step": 511 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032613757848323977, + "loss": 0.3789, + "step": 512 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032580262651866446, + "loss": 0.4944, + "step": 513 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003254670896694952, + "loss": 0.4259, + "step": 514 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032513096949571805, + "loss": 0.5037, + "step": 515 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032479426756003093, + "loss": 0.5857, + "step": 516 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003244569854278366, + "loss": 0.5407, + "step": 517 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032411912466723524, + "loss": 0.499, + "step": 518 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003237806868490172, + "loss": 0.4359, + "step": 519 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032344167354665573, + "loss": 0.4374, + "step": 520 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003231020863362997, + "loss": 0.4172, + "step": 521 + }, + { + "epoch": 1.01, + "learning_rate": 0.000322761926796766, + "loss": 0.4451, + "step": 522 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003224211965095326, + "loss": 0.4, + "step": 523 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003220798970587309, + "loss": 0.4009, + "step": 524 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003217380300311386, + "loss": 0.3966, + "step": 525 + }, + { + "epoch": 1.01, + "learning_rate": 0.000321395597016172, + "loss": 0.4255, + "step": 526 + }, + { + "epoch": 1.01, + "learning_rate": 0.00032105259960587895, + "loss": 0.4707, + "step": 527 + }, + { + "epoch": 1.02, + "learning_rate": 0.00032070903939493124, + "loss": 0.5313, + "step": 528 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003203649179806172, + "loss": 0.3596, + "step": 529 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003200202369628345, + "loss": 0.5223, + "step": 530 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031967499794408234, + "loss": 0.4146, + "step": 531 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031932920252945423, + "loss": 0.4328, + "step": 532 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003189828523266306, + "loss": 0.4258, + "step": 533 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031863594894587105, + "loss": 0.4457, + "step": 534 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003182884940000072, + "loss": 0.5249, + "step": 535 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003179404891044348, + "loss": 0.4751, + "step": 536 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031759193587710676, + "loss": 0.5378, + "step": 537 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031724283593852497, + "loss": 0.634, + "step": 538 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031689319091173326, + "loss": 0.4298, + "step": 539 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031654300242230977, + "loss": 0.5469, + "step": 540 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031619227209835917, + "loss": 0.5153, + "step": 541 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003158410015705053, + "loss": 0.4144, + "step": 542 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003154891924718837, + "loss": 0.6041, + "step": 543 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003151368464381335, + "loss": 0.4891, + "step": 544 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003147839651073904, + "loss": 0.5258, + "step": 545 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031443055012027874, + "loss": 0.4351, + "step": 546 + }, + { + "epoch": 1.05, + "learning_rate": 0.000314076603119904, + "loss": 0.4556, + "step": 547 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031372212575184514, + "loss": 0.5445, + "step": 548 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031336711966414675, + "loss": 0.5585, + "step": 549 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003130115865073117, + "loss": 0.367, + "step": 550 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003126555279342933, + "loss": 0.4877, + "step": 551 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003122989456004876, + "loss": 0.4335, + "step": 552 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003119418411637258, + "loss": 0.4383, + "step": 553 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003115842162842663, + "loss": 0.4508, + "step": 554 + }, + { + "epoch": 1.07, + "learning_rate": 0.00031122607262478743, + "loss": 0.4631, + "step": 555 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003108674118503793, + "loss": 0.3496, + "step": 556 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003105082356285361, + "loss": 0.4108, + "step": 557 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003101485456291486, + "loss": 0.4877, + "step": 558 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030978834352449614, + "loss": 0.3696, + "step": 559 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030942763098923913, + "loss": 0.5138, + "step": 560 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030906640970041084, + "loss": 0.5961, + "step": 561 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003087046813374099, + "loss": 0.3824, + "step": 562 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030834244758199276, + "loss": 0.4925, + "step": 563 + }, + { + "epoch": 1.09, + "learning_rate": 0.000307979710118265, + "loss": 0.4511, + "step": 564 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030761647063267457, + "loss": 0.4306, + "step": 565 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003072527308140031, + "loss": 0.468, + "step": 566 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030688849235335856, + "loss": 0.4842, + "step": 567 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003065237569441671, + "loss": 0.4332, + "step": 568 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030615852628216537, + "loss": 0.4637, + "step": 569 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003057928020653925, + "loss": 0.6193, + "step": 570 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003054265859941824, + "loss": 0.5033, + "step": 571 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030505987977115555, + "loss": 0.4185, + "step": 572 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003046926851012114, + "loss": 0.4211, + "step": 573 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003043250036915201, + "loss": 0.5089, + "step": 574 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030395683725151505, + "loss": 0.517, + "step": 575 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003035881874928845, + "loss": 0.492, + "step": 576 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003032190561295636, + "loss": 0.4535, + "step": 577 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003028494448777269, + "loss": 0.3947, + "step": 578 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030247935545577986, + "loss": 0.3125, + "step": 579 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003021087895843511, + "loss": 0.3882, + "step": 580 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003017377489862845, + "loss": 0.4802, + "step": 581 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030136623538663083, + "loss": 0.4652, + "step": 582 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030099425051263994, + "loss": 0.3816, + "step": 583 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003006217960937529, + "loss": 0.4583, + "step": 584 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030024887386159385, + "loss": 0.4568, + "step": 585 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029987548554996174, + "loss": 0.3908, + "step": 586 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002995016328948225, + "loss": 0.4235, + "step": 587 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029912731763430075, + "loss": 0.4138, + "step": 588 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029875254150867216, + "loss": 0.5838, + "step": 589 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002983773062603548, + "loss": 0.462, + "step": 590 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029800161363390145, + "loss": 0.4632, + "step": 591 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029762546537599125, + "loss": 0.5898, + "step": 592 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002972488632354218, + "loss": 0.4742, + "step": 593 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029687180896310065, + "loss": 0.4579, + "step": 594 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002964943043120378, + "loss": 0.5514, + "step": 595 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029611635103733675, + "loss": 0.4304, + "step": 596 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002957379508961871, + "loss": 0.4383, + "step": 597 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029535910564785584, + "loss": 0.5327, + "step": 598 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029497981705367933, + "loss": 0.4781, + "step": 599 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029460008687705525, + "loss": 0.4178, + "step": 600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002942199168834342, + "loss": 0.3987, + "step": 601 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029383930884031183, + "loss": 0.3861, + "step": 602 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029345826451722005, + "loss": 0.5322, + "step": 603 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029307678568571936, + "loss": 0.3997, + "step": 604 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002926948741193903, + "loss": 0.4121, + "step": 605 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029231253159382514, + "loss": 0.4931, + "step": 606 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029192975988662017, + "loss": 0.4626, + "step": 607 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029154656077736666, + "loss": 0.4441, + "step": 608 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002911629360476432, + "loss": 0.3863, + "step": 609 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029077888748100703, + "loss": 0.36, + "step": 610 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029039441686298594, + "loss": 0.4246, + "step": 611 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002900095259810702, + "loss": 0.4916, + "step": 612 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028962421662470346, + "loss": 0.4896, + "step": 613 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028923849058527535, + "loss": 0.4237, + "step": 614 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028885234965611274, + "loss": 0.5727, + "step": 615 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028846579563247116, + "loss": 0.5681, + "step": 616 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002880788303115269, + "loss": 0.4383, + "step": 617 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028769145549236845, + "loss": 0.4962, + "step": 618 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002873036729759881, + "loss": 0.5472, + "step": 619 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002869154845652738, + "loss": 0.5431, + "step": 620 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002865268920650003, + "loss": 0.4152, + "step": 621 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002861378972818211, + "loss": 0.3922, + "step": 622 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002857485020242602, + "loss": 0.5129, + "step": 623 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002853587081027034, + "loss": 0.4328, + "step": 624 + }, + { + "epoch": 1.2, + "learning_rate": 0.00028496851732938997, + "loss": 0.4431, + "step": 625 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002845779315184042, + "loss": 0.4968, + "step": 626 + }, + { + "epoch": 1.21, + "learning_rate": 0.000284186952485667, + "loss": 0.5301, + "step": 627 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002837955820489276, + "loss": 0.4332, + "step": 628 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002834038220277546, + "loss": 0.4245, + "step": 629 + }, + { + "epoch": 1.21, + "learning_rate": 0.00028301167424352836, + "loss": 0.5057, + "step": 630 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028261914051943166, + "loss": 0.4623, + "step": 631 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028222622268044174, + "loss": 0.5452, + "step": 632 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028183292255332164, + "loss": 0.5238, + "step": 633 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028143924196661176, + "loss": 0.3966, + "step": 634 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002810451827506214, + "loss": 0.35, + "step": 635 + }, + { + "epoch": 1.23, + "learning_rate": 0.00028065074673742007, + "loss": 0.4325, + "step": 636 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002802559357608292, + "loss": 0.4854, + "step": 637 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027986075165641343, + "loss": 0.4254, + "step": 638 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027946519626147225, + "loss": 0.4614, + "step": 639 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027906927141503125, + "loss": 0.3798, + "step": 640 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027867297895783373, + "loss": 0.4742, + "step": 641 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002782763207323322, + "loss": 0.4007, + "step": 642 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002778792985826795, + "loss": 0.4383, + "step": 643 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002774819143547206, + "loss": 0.4298, + "step": 644 + }, + { + "epoch": 1.24, + "learning_rate": 0.00027708416989598387, + "loss": 0.5178, + "step": 645 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002766860670556722, + "loss": 0.3434, + "step": 646 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002762876076846551, + "loss": 0.3862, + "step": 647 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027588879363545934, + "loss": 0.4459, + "step": 648 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002754896267622608, + "loss": 0.3934, + "step": 649 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027509010892087565, + "loss": 0.4349, + "step": 650 + }, + { + "epoch": 1.25, + "learning_rate": 0.000274690241968752, + "loss": 0.4178, + "step": 651 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002742900277649607, + "loss": 0.4151, + "step": 652 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002738894681701874, + "loss": 0.3888, + "step": 653 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027348856504672323, + "loss": 0.4214, + "step": 654 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002730873202584567, + "loss": 0.519, + "step": 655 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027268573567086477, + "loss": 0.5463, + "step": 656 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027228381315100417, + "loss": 0.3367, + "step": 657 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027188155456750256, + "loss": 0.4629, + "step": 658 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027147896179055043, + "loss": 0.4456, + "step": 659 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002710760366918917, + "loss": 0.4348, + "step": 660 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002706727811448153, + "loss": 0.4505, + "step": 661 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002702691970241468, + "loss": 0.5028, + "step": 662 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026986528620623904, + "loss": 0.5257, + "step": 663 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026946105056896403, + "loss": 0.4977, + "step": 664 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026905649199170377, + "loss": 0.421, + "step": 665 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002686516123553417, + "loss": 0.4931, + "step": 666 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026824641354225397, + "loss": 0.5818, + "step": 667 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002678408974363005, + "loss": 0.4211, + "step": 668 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026743506592281674, + "loss": 0.5182, + "step": 669 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026702892088860413, + "loss": 0.5591, + "step": 670 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002666224642219221, + "loss": 0.5363, + "step": 671 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002662156978124786, + "loss": 0.5866, + "step": 672 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002658086235514218, + "loss": 0.422, + "step": 673 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002654012433313312, + "loss": 0.5375, + "step": 674 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002649935590462087, + "loss": 0.4752, + "step": 675 + }, + { + "epoch": 1.3, + "learning_rate": 0.00026458557259146986, + "loss": 0.4271, + "step": 676 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002641772858639351, + "loss": 0.4843, + "step": 677 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026376870076182086, + "loss": 0.4827, + "step": 678 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026335981918473086, + "loss": 0.47, + "step": 679 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002629506430336472, + "loss": 0.368, + "step": 680 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026254117421092133, + "loss": 0.481, + "step": 681 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002621314146202656, + "loss": 0.4153, + "step": 682 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002617213661667443, + "loss": 0.4397, + "step": 683 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002613110307567643, + "loss": 0.4052, + "step": 684 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026090041029806695, + "loss": 0.4652, + "step": 685 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026048950669971884, + "loss": 0.3826, + "step": 686 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026007832187210277, + "loss": 0.5639, + "step": 687 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025966685772690906, + "loss": 0.3917, + "step": 688 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025925511617712685, + "loss": 0.5248, + "step": 689 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002588430991370347, + "loss": 0.3796, + "step": 690 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002584308085221922, + "loss": 0.4391, + "step": 691 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025801824624943084, + "loss": 0.4514, + "step": 692 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025760541423684496, + "loss": 0.5046, + "step": 693 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002571923144037831, + "loss": 0.4578, + "step": 694 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002567789486708389, + "loss": 0.4681, + "step": 695 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025636531895984236, + "loss": 0.4501, + "step": 696 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002559514271938506, + "loss": 0.4411, + "step": 697 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025553727529713916, + "loss": 0.401, + "step": 698 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025512286519519293, + "loss": 0.4911, + "step": 699 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002547081988146974, + "loss": 0.3754, + "step": 700 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025429327808352946, + "loss": 0.3807, + "step": 701 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002538781049307486, + "loss": 0.4193, + "step": 702 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002534626812865876, + "loss": 0.5259, + "step": 703 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025304700908244433, + "loss": 0.3684, + "step": 704 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002526310902508718, + "loss": 0.5423, + "step": 705 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002522149267255699, + "loss": 0.4288, + "step": 706 + }, + { + "epoch": 1.36, + "learning_rate": 0.000251798520441376, + "loss": 0.5046, + "step": 707 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002513818733342564, + "loss": 0.3777, + "step": 708 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025096498734129667, + "loss": 0.5171, + "step": 709 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002505478644006932, + "loss": 0.3785, + "step": 710 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025013050645174414, + "loss": 0.5413, + "step": 711 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024971291543483994, + "loss": 0.5018, + "step": 712 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024929509329145477, + "loss": 0.5212, + "step": 713 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024887704196413746, + "loss": 0.483, + "step": 714 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002484587633965023, + "loss": 0.3684, + "step": 715 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024804025953322005, + "loss": 0.3782, + "step": 716 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024762153232000877, + "loss": 0.4995, + "step": 717 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002472025837036253, + "loss": 0.4324, + "step": 718 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002467834156318555, + "loss": 0.5203, + "step": 719 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002463640300535057, + "loss": 0.423, + "step": 720 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002459444289183933, + "loss": 0.4537, + "step": 721 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024552461417733817, + "loss": 0.4124, + "step": 722 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002451045877821528, + "loss": 0.4865, + "step": 723 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002446843516856343, + "loss": 0.4845, + "step": 724 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024426390784155425, + "loss": 0.4174, + "step": 725 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024384325820465033, + "loss": 0.4456, + "step": 726 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002434224047306169, + "loss": 0.4429, + "step": 727 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002430013493760961, + "loss": 0.363, + "step": 728 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024258009409866853, + "loss": 0.4769, + "step": 729 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024215864085684442, + "loss": 0.4597, + "step": 730 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024173699161005429, + "loss": 0.366, + "step": 731 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024131514831863995, + "loss": 0.4746, + "step": 732 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002408931129438453, + "loss": 0.5608, + "step": 733 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024047088744780744, + "loss": 0.4292, + "step": 734 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024004847379354726, + "loss": 0.4743, + "step": 735 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023962587394496038, + "loss": 0.3855, + "step": 736 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023920308986680834, + "loss": 0.4573, + "step": 737 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023878012352470892, + "loss": 0.3937, + "step": 738 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002383569768851274, + "loss": 0.4371, + "step": 739 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023793365191536735, + "loss": 0.5432, + "step": 740 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023751015058356135, + "loss": 0.4803, + "step": 741 + }, + { + "epoch": 1.43, + "learning_rate": 0.000237086474858662, + "loss": 0.4281, + "step": 742 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023666262671043263, + "loss": 0.4031, + "step": 743 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023623860810943826, + "loss": 0.4725, + "step": 744 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002358144210270364, + "loss": 0.4644, + "step": 745 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023539006743536774, + "loss": 0.4848, + "step": 746 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023496554930734718, + "loss": 0.4084, + "step": 747 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023454086861665472, + "loss": 0.4322, + "step": 748 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023411602733772595, + "loss": 0.4847, + "step": 749 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023369102744574312, + "loss": 0.4298, + "step": 750 + } + ], + "logging_steps": 1, + "max_steps": 1557, + "num_train_epochs": 3, + "save_steps": 50, + "total_flos": 1.0028969008307896e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-750/training_args.bin b/checkpoint-750/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbdf20491848d40e9a89bca19c6229b4b2b55e5d --- /dev/null +++ b/checkpoint-750/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07819caee47f45203545f962678d52b5954ff1fd4afe1d5152fad48004402099 +size 4155 diff --git a/checkpoint-800/README.md b/checkpoint-800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b32efe7366f05d1d90816d2ad9e4b06ccca46bea --- /dev/null +++ b/checkpoint-800/README.md @@ -0,0 +1,219 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 + +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-800/adapter_config.json b/checkpoint-800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e108f2da037ef6250457c67a4bedd308d97303c --- /dev/null +++ b/checkpoint-800/adapter_config.json @@ -0,0 +1,24 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-800/adapter_model.bin b/checkpoint-800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7befcae362857d17a14c827481a23f31fbca9141 --- /dev/null +++ b/checkpoint-800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e3301ab4b78b8591003f6fa1b7f17c88567d3174caaac84f4f8070f236f32f1 +size 113314765 diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4d39305f658175907dc3a842d7a3e66c73174c7 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0debffdaf52926aebb9acfd67cb9e51dd2e73647f5a198ae6a54038b017ee81a +size 226653957 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..79ff41d4f75c9c761b8d216f548088000567ba5d --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67ada9a2f7ba8a9056551a898e8d4dffa8326b6a4cc89d38d13d12d8d808021 +size 14575 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..97f1b41efe321c456bbbba2f102b8b77225b6631 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f308f58f5d6fd593a960d76cee3a327399d8ab47d316d9f148d9ef0c6dd55e26 +size 627 diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f422e5050d1ff6a6b51a7358f7676aa3fd506a0f --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,4819 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5411000120351426, + "eval_steps": 500, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6869, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.8396, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 0.7489, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7252, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.6548, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 0.8022, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6524, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6981, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 0.7488, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 0.6368, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.6891, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 0.7968, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 5.2000000000000004e-05, + "loss": 0.6912, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 5.6000000000000006e-05, + "loss": 0.8452, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 6e-05, + "loss": 0.6989, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 6.400000000000001e-05, + "loss": 0.6685, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-05, + "loss": 0.5469, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 7.2e-05, + "loss": 0.7915, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 7.6e-05, + "loss": 0.7744, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 8e-05, + "loss": 0.6804, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 8.4e-05, + "loss": 0.7796, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 8.800000000000001e-05, + "loss": 0.706, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000001e-05, + "loss": 0.6798, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 9.6e-05, + "loss": 0.6333, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001, + "loss": 0.6012, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010400000000000001, + "loss": 0.52, + "step": 26 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010800000000000001, + "loss": 0.6583, + "step": 27 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011200000000000001, + "loss": 0.7354, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 0.000116, + "loss": 0.6296, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012, + "loss": 0.6352, + "step": 30 + }, + { + "epoch": 0.06, + "learning_rate": 0.000124, + "loss": 0.6007, + "step": 31 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012800000000000002, + "loss": 0.5659, + "step": 32 + }, + { + "epoch": 0.06, + "learning_rate": 0.000132, + "loss": 0.5138, + "step": 33 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013600000000000003, + "loss": 0.6639, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014, + "loss": 0.5934, + "step": 35 + }, + { + "epoch": 0.07, + "learning_rate": 0.000144, + "loss": 0.5233, + "step": 36 + }, + { + "epoch": 0.07, + "learning_rate": 0.000148, + "loss": 0.5307, + "step": 37 + }, + { + "epoch": 0.07, + "learning_rate": 0.000152, + "loss": 0.5928, + "step": 38 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015600000000000002, + "loss": 0.5908, + "step": 39 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016, + "loss": 0.6366, + "step": 40 + }, + { + "epoch": 0.08, + "learning_rate": 0.000164, + "loss": 0.5972, + "step": 41 + }, + { + "epoch": 0.08, + "learning_rate": 0.000168, + "loss": 0.4825, + "step": 42 + }, + { + "epoch": 0.08, + "learning_rate": 0.000172, + "loss": 0.6783, + "step": 43 + }, + { + "epoch": 0.08, + "learning_rate": 0.00017600000000000002, + "loss": 0.6082, + "step": 44 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018, + "loss": 0.7633, + "step": 45 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018400000000000003, + "loss": 0.5988, + "step": 46 + }, + { + "epoch": 0.09, + "learning_rate": 0.000188, + "loss": 0.6658, + "step": 47 + }, + { + "epoch": 0.09, + "learning_rate": 0.000192, + "loss": 0.5945, + "step": 48 + }, + { + "epoch": 0.09, + "learning_rate": 0.000196, + "loss": 0.5984, + "step": 49 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002, + "loss": 0.6778, + "step": 50 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020400000000000003, + "loss": 0.6057, + "step": 51 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020800000000000001, + "loss": 0.601, + "step": 52 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021200000000000003, + "loss": 0.5566, + "step": 53 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021600000000000002, + "loss": 0.5911, + "step": 54 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022000000000000003, + "loss": 0.7636, + "step": 55 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022400000000000002, + "loss": 0.5537, + "step": 56 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022799999999999999, + "loss": 0.6037, + "step": 57 + }, + { + "epoch": 0.11, + "learning_rate": 0.000232, + "loss": 0.6474, + "step": 58 + }, + { + "epoch": 0.11, + "learning_rate": 0.000236, + "loss": 0.6483, + "step": 59 + }, + { + "epoch": 0.12, + "learning_rate": 0.00024, + "loss": 0.5021, + "step": 60 + }, + { + "epoch": 0.12, + "learning_rate": 0.000244, + "loss": 0.5347, + "step": 61 + }, + { + "epoch": 0.12, + "learning_rate": 0.000248, + "loss": 0.5791, + "step": 62 + }, + { + "epoch": 0.12, + "learning_rate": 0.000252, + "loss": 0.5407, + "step": 63 + }, + { + "epoch": 0.12, + "learning_rate": 0.00025600000000000004, + "loss": 0.5298, + "step": 64 + }, + { + "epoch": 0.13, + "learning_rate": 0.00026000000000000003, + "loss": 0.5685, + "step": 65 + }, + { + "epoch": 0.13, + "learning_rate": 0.000264, + "loss": 0.5108, + "step": 66 + }, + { + "epoch": 0.13, + "learning_rate": 0.000268, + "loss": 0.526, + "step": 67 + }, + { + "epoch": 0.13, + "learning_rate": 0.00027200000000000005, + "loss": 0.6843, + "step": 68 + }, + { + "epoch": 0.13, + "learning_rate": 0.000276, + "loss": 0.6608, + "step": 69 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028, + "loss": 0.5866, + "step": 70 + }, + { + "epoch": 0.14, + "learning_rate": 0.000284, + "loss": 0.6422, + "step": 71 + }, + { + "epoch": 0.14, + "learning_rate": 0.000288, + "loss": 0.449, + "step": 72 + }, + { + "epoch": 0.14, + "learning_rate": 0.000292, + "loss": 0.5319, + "step": 73 + }, + { + "epoch": 0.14, + "learning_rate": 0.000296, + "loss": 0.5977, + "step": 74 + }, + { + "epoch": 0.14, + "learning_rate": 0.00030000000000000003, + "loss": 0.5805, + "step": 75 + }, + { + "epoch": 0.15, + "learning_rate": 0.000304, + "loss": 0.5209, + "step": 76 + }, + { + "epoch": 0.15, + "learning_rate": 0.000308, + "loss": 0.6098, + "step": 77 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031200000000000005, + "loss": 0.4665, + "step": 78 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031600000000000004, + "loss": 0.6882, + "step": 79 + }, + { + "epoch": 0.15, + "learning_rate": 0.00032, + "loss": 0.5427, + "step": 80 + }, + { + "epoch": 0.16, + "learning_rate": 0.000324, + "loss": 0.5345, + "step": 81 + }, + { + "epoch": 0.16, + "learning_rate": 0.000328, + "loss": 0.663, + "step": 82 + }, + { + "epoch": 0.16, + "learning_rate": 0.000332, + "loss": 0.5393, + "step": 83 + }, + { + "epoch": 0.16, + "learning_rate": 0.000336, + "loss": 0.5711, + "step": 84 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034, + "loss": 0.5261, + "step": 85 + }, + { + "epoch": 0.17, + "learning_rate": 0.000344, + "loss": 0.5775, + "step": 86 + }, + { + "epoch": 0.17, + "learning_rate": 0.000348, + "loss": 0.6329, + "step": 87 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035200000000000005, + "loss": 0.4425, + "step": 88 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035600000000000003, + "loss": 0.6837, + "step": 89 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036, + "loss": 0.615, + "step": 90 + }, + { + "epoch": 0.18, + "learning_rate": 0.000364, + "loss": 0.5615, + "step": 91 + }, + { + "epoch": 0.18, + "learning_rate": 0.00036800000000000005, + "loss": 0.5434, + "step": 92 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037200000000000004, + "loss": 0.5864, + "step": 93 + }, + { + "epoch": 0.18, + "learning_rate": 0.000376, + "loss": 0.5583, + "step": 94 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038, + "loss": 0.5299, + "step": 95 + }, + { + "epoch": 0.18, + "learning_rate": 0.000384, + "loss": 0.532, + "step": 96 + }, + { + "epoch": 0.19, + "learning_rate": 0.000388, + "loss": 0.5227, + "step": 97 + }, + { + "epoch": 0.19, + "learning_rate": 0.000392, + "loss": 0.5275, + "step": 98 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039600000000000003, + "loss": 0.4541, + "step": 99 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004, + "loss": 0.6485, + "step": 100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003999995350775973, + "loss": 0.5438, + "step": 101 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999814031255063, + "loss": 0.5997, + "step": 102 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999581571134455, + "loss": 0.5322, + "step": 103 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003999925612847867, + "loss": 0.484, + "step": 104 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998837704800766, + "loss": 0.5961, + "step": 105 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998326302046085, + "loss": 0.7405, + "step": 106 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997721922592255, + "loss": 0.5802, + "step": 107 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997024569249167, + "loss": 0.769, + "step": 108 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999623424525898, + "loss": 0.5598, + "step": 109 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999535095429608, + "loss": 0.6143, + "step": 110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039994374700467095, + "loss": 0.5766, + "step": 111 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039993305488310836, + "loss": 0.7695, + "step": 112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999214332279831, + "loss": 0.7153, + "step": 113 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999088820933269, + "loss": 0.5835, + "step": 114 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039989540153749286, + "loss": 0.6634, + "step": 115 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399880991623155, + "loss": 0.6069, + "step": 116 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998656524173082, + "loss": 0.7224, + "step": 117 + }, + { + "epoch": 0.23, + "learning_rate": 0.000399849383991268, + "loss": 0.5884, + "step": 118 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998321864206699, + "loss": 0.5122, + "step": 119 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039981405978546924, + "loss": 0.6453, + "step": 120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997950041699408, + "loss": 0.4665, + "step": 121 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997750196626785, + "loss": 0.5428, + "step": 122 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039975410635659464, + "loss": 0.4365, + "step": 123 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039973226434891995, + "loss": 0.5978, + "step": 124 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039970949374120286, + "loss": 0.7729, + "step": 125 + }, + { + "epoch": 0.24, + "learning_rate": 0.000399685794639309, + "loss": 0.6212, + "step": 126 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039966116715342066, + "loss": 0.5426, + "step": 127 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039963561139803676, + "loss": 0.5782, + "step": 128 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003996091274919716, + "loss": 0.6701, + "step": 129 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995817155583548, + "loss": 0.6314, + "step": 130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995533757246307, + "loss": 0.6662, + "step": 131 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995241081225573, + "loss": 0.5192, + "step": 132 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994939128882065, + "loss": 0.5591, + "step": 133 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994627901619625, + "loss": 0.5809, + "step": 134 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994307400885219, + "loss": 0.4871, + "step": 135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993977628168928, + "loss": 0.6666, + "step": 136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993638585003938, + "loss": 0.6469, + "step": 137 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039932902729665357, + "loss": 0.5727, + "step": 138 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039929326936761036, + "loss": 0.6715, + "step": 139 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039925658487951067, + "loss": 0.5686, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039921897400290894, + "loss": 0.501, + "step": 141 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039918043691266665, + "loss": 0.5795, + "step": 142 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039914097378795124, + "loss": 0.6287, + "step": 143 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039910058481223564, + "loss": 0.7016, + "step": 144 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039905927017329726, + "loss": 0.6232, + "step": 145 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039901703006321715, + "loss": 0.5291, + "step": 146 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039897386467837903, + "loss": 0.5297, + "step": 147 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892977421946844, + "loss": 0.5784, + "step": 148 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988847588914718, + "loss": 0.5714, + "step": 149 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988388189036754, + "loss": 0.5044, + "step": 150 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987919544696646, + "loss": 0.8246, + "step": 151 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987441658073226, + "loss": 0.5048, + "step": 152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003986954531388297, + "loss": 0.5433, + "step": 153 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039864581669066186, + "loss": 0.5251, + "step": 154 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003985952566935902, + "loss": 0.5708, + "step": 155 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039854377338267936, + "loss": 0.6276, + "step": 156 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039849136699728684, + "loss": 0.4915, + "step": 157 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003984380377810617, + "loss": 0.6389, + "step": 158 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039838378598194325, + "loss": 0.6067, + "step": 159 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039832861185216045, + "loss": 0.6136, + "step": 160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003982725156482301, + "loss": 0.5597, + "step": 161 + }, + { + "epoch": 0.31, + "learning_rate": 0.000398215497630956, + "loss": 0.5957, + "step": 162 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003981575580654278, + "loss": 0.5853, + "step": 163 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980986972210194, + "loss": 0.5462, + "step": 164 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980389153713881, + "loss": 0.5302, + "step": 165 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039797821279447307, + "loss": 0.5395, + "step": 166 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039791658977249425, + "loss": 0.7004, + "step": 167 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039785404659195084, + "loss": 0.5622, + "step": 168 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039779058354362013, + "loss": 0.5759, + "step": 169 + }, + { + "epoch": 0.33, + "learning_rate": 0.000397726200922556, + "loss": 0.6184, + "step": 170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003976608990280877, + "loss": 0.5488, + "step": 171 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975946781638183, + "loss": 0.6162, + "step": 172 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975275386376236, + "loss": 0.558, + "step": 173 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003974594807616502, + "loss": 0.519, + "step": 174 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003973905048523144, + "loss": 0.6195, + "step": 175 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039732061123030064, + "loss": 0.5991, + "step": 176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003972498002205601, + "loss": 0.5428, + "step": 177 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039717807215230896, + "loss": 0.5323, + "step": 178 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039710542735902705, + "loss": 0.5307, + "step": 179 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003970318661784564, + "loss": 0.5783, + "step": 180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003969573889525993, + "loss": 0.5924, + "step": 181 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039688199602771714, + "loss": 0.5902, + "step": 182 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039680568775432855, + "loss": 0.6291, + "step": 183 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003967284644872077, + "loss": 0.5942, + "step": 184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003966503265853829, + "loss": 0.4878, + "step": 185 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003965712744121347, + "loss": 0.6487, + "step": 186 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003964913083349945, + "loss": 0.6111, + "step": 187 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039641042872574233, + "loss": 0.6072, + "step": 188 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039632863596040575, + "loss": 0.716, + "step": 189 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039624593041925763, + "loss": 0.6178, + "step": 190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003961623124868145, + "loss": 0.6323, + "step": 191 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039607778255183485, + "loss": 0.5821, + "step": 192 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959923410073174, + "loss": 0.6738, + "step": 193 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959059882504989, + "loss": 0.6203, + "step": 194 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039581872468285277, + "loss": 0.632, + "step": 195 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003957305507100868, + "loss": 0.5857, + "step": 196 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039564146674214164, + "loss": 0.6311, + "step": 197 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003955514731931885, + "loss": 0.5889, + "step": 198 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039546057048162763, + "loss": 0.5201, + "step": 199 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039536875903008607, + "loss": 0.5581, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039527603926541586, + "loss": 0.5104, + "step": 201 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039518241161869193, + "loss": 0.5978, + "step": 202 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039508787652521013, + "loss": 0.6244, + "step": 203 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039499243442448536, + "loss": 0.589, + "step": 204 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003948960857602493, + "loss": 0.575, + "step": 205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947988309804485, + "loss": 0.5494, + "step": 206 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947006705372422, + "loss": 0.4895, + "step": 207 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039460160488700036, + "loss": 0.5479, + "step": 208 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039450163449030124, + "loss": 0.5893, + "step": 209 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003944007598119297, + "loss": 0.5451, + "step": 210 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003942989813208747, + "loss": 0.5582, + "step": 211 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003941962994903273, + "loss": 0.5121, + "step": 212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039409271479767826, + "loss": 0.6324, + "step": 213 + }, + { + "epoch": 0.41, + "learning_rate": 0.000393988227724516, + "loss": 0.6118, + "step": 214 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003938828387566244, + "loss": 0.6303, + "step": 215 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003937765483839804, + "loss": 0.7705, + "step": 216 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003936693571007517, + "loss": 0.6224, + "step": 217 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003935612654052946, + "loss": 0.5664, + "step": 218 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039345227380015163, + "loss": 0.66, + "step": 219 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039334238279204906, + "loss": 0.5582, + "step": 220 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039323159289189505, + "loss": 0.6087, + "step": 221 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003931199046147764, + "loss": 0.5566, + "step": 222 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039300731847995716, + "loss": 0.5775, + "step": 223 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039289383501087534, + "loss": 0.5081, + "step": 224 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039277945473514104, + "loss": 0.5218, + "step": 225 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003926641781845338, + "loss": 0.6655, + "step": 226 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003925480058950002, + "loss": 0.5735, + "step": 227 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039243093840665114, + "loss": 0.6609, + "step": 228 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003923129762637596, + "loss": 0.7323, + "step": 229 + }, + { + "epoch": 0.44, + "learning_rate": 0.000392194120014758, + "loss": 0.5703, + "step": 230 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039207437021223583, + "loss": 0.6545, + "step": 231 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003919537274129366, + "loss": 0.521, + "step": 232 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039183219217775564, + "loss": 0.5257, + "step": 233 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003917097650717377, + "loss": 0.5487, + "step": 234 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039158644666407365, + "loss": 0.4861, + "step": 235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039146223752809845, + "loss": 0.4928, + "step": 236 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003913371382412883, + "loss": 0.5253, + "step": 237 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039121114938525756, + "loss": 0.6155, + "step": 238 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039108427154575684, + "loss": 0.55, + "step": 239 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039095650531266967, + "loss": 0.6617, + "step": 240 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039082785128000976, + "loss": 0.5198, + "step": 241 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039069831004591866, + "loss": 0.5302, + "step": 242 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003905678822126625, + "loss": 0.5347, + "step": 243 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039043656838662946, + "loss": 0.531, + "step": 244 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039030436917832697, + "loss": 0.4884, + "step": 245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039017128520237883, + "loss": 0.6027, + "step": 246 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003900373170775222, + "loss": 0.5537, + "step": 247 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038990246542660494, + "loss": 0.5753, + "step": 248 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038976673087658256, + "loss": 0.5059, + "step": 249 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038963011405851537, + "loss": 0.5118, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038949261560756565, + "loss": 0.5645, + "step": 251 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003893542361629944, + "loss": 0.5623, + "step": 252 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038921497636815866, + "loss": 0.5216, + "step": 253 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003890748368705085, + "loss": 0.4501, + "step": 254 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003889338183215838, + "loss": 0.48, + "step": 255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038879192137701135, + "loss": 0.5218, + "step": 256 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003886491466965018, + "loss": 0.5858, + "step": 257 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038850549494384685, + "loss": 0.6124, + "step": 258 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038836096678691536, + "loss": 0.4645, + "step": 259 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038821556289765136, + "loss": 0.474, + "step": 260 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038806928395207003, + "loss": 0.4364, + "step": 261 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038792213063025484, + "loss": 0.5821, + "step": 262 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003877741036163547, + "loss": 0.5393, + "step": 263 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003876252035985804, + "loss": 0.5373, + "step": 264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003874754312692013, + "loss": 0.6021, + "step": 265 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003873247873245426, + "loss": 0.4549, + "step": 266 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003871732724649817, + "loss": 0.5994, + "step": 267 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003870208873949453, + "loss": 0.4764, + "step": 268 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038686763282290556, + "loss": 0.4311, + "step": 269 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003867135094613774, + "loss": 0.5462, + "step": 270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003865585180269148, + "loss": 0.5006, + "step": 271 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003864026592401076, + "loss": 0.5347, + "step": 272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038624593382557835, + "loss": 0.5242, + "step": 273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038608834251197856, + "loss": 0.5005, + "step": 274 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038592988603198554, + "loss": 0.5436, + "step": 275 + }, + { + "epoch": 0.53, + "learning_rate": 0.000385770565122299, + "loss": 0.4658, + "step": 276 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003856103805236375, + "loss": 0.5273, + "step": 277 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038544933298073516, + "loss": 0.436, + "step": 278 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038528742324233804, + "loss": 0.4785, + "step": 279 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038512465206120086, + "loss": 0.5366, + "step": 280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038496102019408324, + "loss": 0.4448, + "step": 281 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038479652840174637, + "loss": 0.5132, + "step": 282 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038463117744894955, + "loss": 0.7918, + "step": 283 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038446496810444627, + "loss": 0.5309, + "step": 284 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038429790114098114, + "loss": 0.5316, + "step": 285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038412997733528576, + "loss": 0.4611, + "step": 286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038396119746807563, + "loss": 0.4609, + "step": 287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038379156232404613, + "loss": 0.5821, + "step": 288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003836210726918691, + "loss": 0.5883, + "step": 289 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003834497293641889, + "loss": 0.5012, + "step": 290 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038327753313761913, + "loss": 0.4457, + "step": 291 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038310448481273867, + "loss": 0.4851, + "step": 292 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038293058519408787, + "loss": 0.5622, + "step": 293 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038275583509016507, + "loss": 0.5703, + "step": 294 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038258023531342265, + "loss": 0.5718, + "step": 295 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003824037866802632, + "loss": 0.5183, + "step": 296 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038222649001103614, + "loss": 0.5085, + "step": 297 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038204834613003323, + "loss": 0.5388, + "step": 298 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038186935586548537, + "loss": 0.5425, + "step": 299 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003816895200495584, + "loss": 0.447, + "step": 300 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003815088395183493, + "loss": 0.5541, + "step": 301 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038132731511188227, + "loss": 0.5518, + "step": 302 + }, + { + "epoch": 0.58, + "learning_rate": 0.000381144947674105, + "loss": 0.5074, + "step": 303 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809617380528847, + "loss": 0.5134, + "step": 304 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807776871000037, + "loss": 0.4599, + "step": 305 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805927956711562, + "loss": 0.5838, + "step": 306 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038040706462594395, + "loss": 0.5216, + "step": 307 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038022049482787216, + "loss": 0.5323, + "step": 308 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003800330871443456, + "loss": 0.5681, + "step": 309 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037984484244666446, + "loss": 0.4172, + "step": 310 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003796557616100207, + "loss": 0.4958, + "step": 311 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003794658455134934, + "loss": 0.662, + "step": 312 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003792750950400451, + "loss": 0.5832, + "step": 313 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003790835110765174, + "loss": 0.4271, + "step": 314 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003788910945136271, + "loss": 0.4842, + "step": 315 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037869784624596186, + "loss": 0.4656, + "step": 316 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037850376717197626, + "loss": 0.4981, + "step": 317 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037830885819398733, + "loss": 0.5162, + "step": 318 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037811312021817067, + "loss": 0.652, + "step": 319 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003779165541545558, + "loss": 0.5104, + "step": 320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003777191609170225, + "loss": 0.4971, + "step": 321 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003775209414232962, + "loss": 0.4871, + "step": 322 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003773218965949436, + "loss": 0.5226, + "step": 323 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037712202735736884, + "loss": 0.4823, + "step": 324 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003769213346398087, + "loss": 0.497, + "step": 325 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003767198193753286, + "loss": 0.5976, + "step": 326 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765174825008181, + "loss": 0.4532, + "step": 327 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003763143249569868, + "loss": 0.5236, + "step": 328 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037611034768835947, + "loss": 0.6513, + "step": 329 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037590555164327224, + "loss": 0.5686, + "step": 330 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037569993777386774, + "loss": 0.456, + "step": 331 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003754935070360909, + "loss": 0.5181, + "step": 332 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003752862603896846, + "loss": 0.4765, + "step": 333 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037507819879818477, + "loss": 0.5363, + "step": 334 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037486932322891646, + "loss": 0.4584, + "step": 335 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037465963465298886, + "loss": 0.5428, + "step": 336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003744491340452913, + "loss": 0.3927, + "step": 337 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003742378223844882, + "loss": 0.5478, + "step": 338 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003740257006530147, + "loss": 0.469, + "step": 339 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037381276983707246, + "loss": 0.5169, + "step": 340 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037359903092662434, + "loss": 0.4797, + "step": 341 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037338448491539054, + "loss": 0.5315, + "step": 342 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037316913280084353, + "loss": 0.4422, + "step": 343 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003729529755842035, + "loss": 0.4426, + "step": 344 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003727360142704337, + "loss": 0.4718, + "step": 345 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003725182498682361, + "loss": 0.5585, + "step": 346 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003722996833900459, + "loss": 0.4775, + "step": 347 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003720803158520279, + "loss": 0.6014, + "step": 348 + }, + { + "epoch": 0.67, + "learning_rate": 0.00037186014827407076, + "loss": 0.5117, + "step": 349 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003716391816797829, + "loss": 0.5404, + "step": 350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003714174170964876, + "loss": 0.527, + "step": 351 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037119485555521796, + "loss": 0.4555, + "step": 352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037097149809071255, + "loss": 0.5372, + "step": 353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037074734574141016, + "loss": 0.5377, + "step": 354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003705223995494454, + "loss": 0.4925, + "step": 355 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037029666056064345, + "loss": 0.482, + "step": 356 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037007012982451546, + "loss": 0.5235, + "step": 357 + }, + { + "epoch": 0.69, + "learning_rate": 0.00036984280839425356, + "loss": 0.4957, + "step": 358 + }, + { + "epoch": 0.69, + "learning_rate": 0.000369614697326726, + "loss": 0.5379, + "step": 359 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003693857976824721, + "loss": 0.4653, + "step": 360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036915611052569785, + "loss": 0.469, + "step": 361 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003689256369242702, + "loss": 0.5618, + "step": 362 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003686943779497124, + "loss": 0.4459, + "step": 363 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003684623346771995, + "loss": 0.5606, + "step": 364 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003682295081855524, + "loss": 0.4368, + "step": 365 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036799589955723375, + "loss": 0.4168, + "step": 366 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036776150987834243, + "loss": 0.4664, + "step": 367 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036752634023860846, + "loss": 0.4737, + "step": 368 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003672903917313883, + "loss": 0.4247, + "step": 369 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036705366545365935, + "loss": 0.5677, + "step": 370 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036681616250601505, + "loss": 0.5441, + "step": 371 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003665778839926599, + "loss": 0.6247, + "step": 372 + }, + { + "epoch": 0.72, + "learning_rate": 0.00036633883102140405, + "loss": 0.5217, + "step": 373 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003660990047036584, + "loss": 0.4651, + "step": 374 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003658584061544291, + "loss": 0.4648, + "step": 375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003656170364923128, + "loss": 0.6048, + "step": 376 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036537489683949114, + "loss": 0.4515, + "step": 377 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003651319883217255, + "loss": 0.5096, + "step": 378 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036488831206835207, + "loss": 0.4231, + "step": 379 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036464386921227637, + "loss": 0.4903, + "step": 380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036439866088996796, + "loss": 0.5131, + "step": 381 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003641526882414553, + "loss": 0.5986, + "step": 382 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003639059524103203, + "loss": 0.6, + "step": 383 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003636584545436931, + "loss": 0.5216, + "step": 384 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003634101957922468, + "loss": 0.5144, + "step": 385 + }, + { + "epoch": 0.74, + "learning_rate": 0.00036316117731019184, + "loss": 0.4963, + "step": 386 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003629114002552711, + "loss": 0.5657, + "step": 387 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036266086578875384, + "loss": 0.5028, + "step": 388 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003624095750754311, + "loss": 0.573, + "step": 389 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036215752928360967, + "loss": 0.5199, + "step": 390 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003619047295851068, + "loss": 0.656, + "step": 391 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036165117715524506, + "loss": 0.5129, + "step": 392 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036139687317284647, + "loss": 0.3945, + "step": 393 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003611418188202271, + "loss": 0.5318, + "step": 394 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036088601528319196, + "loss": 0.5344, + "step": 395 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036062946375102885, + "loss": 0.5407, + "step": 396 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003603721654165034, + "loss": 0.5364, + "step": 397 + }, + { + "epoch": 0.77, + "learning_rate": 0.00036011412147585306, + "loss": 0.5407, + "step": 398 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003598553331287821, + "loss": 0.5999, + "step": 399 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003595958015784555, + "loss": 0.624, + "step": 400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00035933552803149354, + "loss": 0.5351, + "step": 401 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003590745136979662, + "loss": 0.5196, + "step": 402 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035881275979138765, + "loss": 0.5447, + "step": 403 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003585502675287104, + "loss": 0.4908, + "step": 404 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035828703813031986, + "loss": 0.5172, + "step": 405 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035802307282002834, + "loss": 0.5923, + "step": 406 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003577583728250699, + "loss": 0.568, + "step": 407 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035749293937609395, + "loss": 0.4618, + "step": 408 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003572267737071601, + "loss": 0.5351, + "step": 409 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003569598770557322, + "loss": 0.5285, + "step": 410 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035669225066267256, + "loss": 0.4571, + "step": 411 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035642389577223625, + "loss": 0.4214, + "step": 412 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003561548136320653, + "loss": 0.5393, + "step": 413 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003558850054931828, + "loss": 0.549, + "step": 414 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035561447260998714, + "loss": 0.4824, + "step": 415 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035534321624024656, + "loss": 0.6244, + "step": 416 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035507123764509245, + "loss": 0.5436, + "step": 417 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003547985380890144, + "loss": 0.5198, + "step": 418 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035452511883985366, + "loss": 0.5979, + "step": 419 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035425098116879754, + "loss": 0.4158, + "step": 420 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035397612635037356, + "loss": 0.5125, + "step": 421 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035370055566244334, + "loss": 0.4699, + "step": 422 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003534242703861966, + "loss": 0.5553, + "step": 423 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035314727180614573, + "loss": 0.5969, + "step": 424 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035286956121011897, + "loss": 0.456, + "step": 425 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003525911398892552, + "loss": 0.5195, + "step": 426 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003523120091379975, + "loss": 0.5187, + "step": 427 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035203217025408726, + "loss": 0.5443, + "step": 428 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003517516245385582, + "loss": 0.4476, + "step": 429 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003514703732957301, + "loss": 0.5757, + "step": 430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035118841783320304, + "loss": 0.5129, + "step": 431 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035090575946185114, + "loss": 0.6354, + "step": 432 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035062239949581645, + "loss": 0.4065, + "step": 433 + }, + { + "epoch": 0.84, + "learning_rate": 0.000350338339252503, + "loss": 0.5472, + "step": 434 + }, + { + "epoch": 0.84, + "learning_rate": 0.00035005358005257045, + "loss": 0.5424, + "step": 435 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034976812321992816, + "loss": 0.6127, + "step": 436 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034948197008172877, + "loss": 0.63, + "step": 437 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003491951219683625, + "loss": 0.413, + "step": 438 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034890758021345034, + "loss": 0.5435, + "step": 439 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034861934615383844, + "loss": 0.5433, + "step": 440 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034833042112959153, + "loss": 0.4763, + "step": 441 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034804080648398667, + "loss": 0.5727, + "step": 442 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034775050356350727, + "loss": 0.5392, + "step": 443 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034745951371783666, + "loss": 0.4981, + "step": 444 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003471678382998518, + "loss": 0.5516, + "step": 445 + }, + { + "epoch": 0.86, + "learning_rate": 0.00034687547866561703, + "loss": 0.4965, + "step": 446 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003465824361743779, + "loss": 0.4982, + "step": 447 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003462887121885544, + "loss": 0.5619, + "step": 448 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003459943080737353, + "loss": 0.5273, + "step": 449 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034569922519867133, + "loss": 0.517, + "step": 450 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034540346493526876, + "loss": 0.4874, + "step": 451 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003451070286585833, + "loss": 0.5966, + "step": 452 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003448099177468137, + "loss": 0.4487, + "step": 453 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003445121335812951, + "loss": 0.5091, + "step": 454 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003442136775464929, + "loss": 0.407, + "step": 455 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003439145510299958, + "loss": 0.6327, + "step": 456 + }, + { + "epoch": 0.88, + "learning_rate": 0.00034361475542251025, + "loss": 0.4217, + "step": 457 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003433142921178531, + "loss": 0.6102, + "step": 458 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003430131625129456, + "loss": 0.5556, + "step": 459 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034271136800780673, + "loss": 0.4986, + "step": 460 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003424089100055467, + "loss": 0.5406, + "step": 461 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034210578991236056, + "loss": 0.5881, + "step": 462 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034180200913752157, + "loss": 0.4869, + "step": 463 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034149756909337454, + "loss": 0.5626, + "step": 464 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003411924711953295, + "loss": 0.564, + "step": 465 + }, + { + "epoch": 0.9, + "learning_rate": 0.00034088671686185486, + "loss": 0.6272, + "step": 466 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003405803075144711, + "loss": 0.4643, + "step": 467 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003402732445777438, + "loss": 0.5435, + "step": 468 + }, + { + "epoch": 0.9, + "learning_rate": 0.00033996552947927744, + "loss": 0.5844, + "step": 469 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003396571636497084, + "loss": 0.5362, + "step": 470 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033934814852269865, + "loss": 0.5607, + "step": 471 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003390384855349285, + "loss": 0.4836, + "step": 472 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033872817612609065, + "loss": 0.6555, + "step": 473 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033841722173888315, + "loss": 0.4784, + "step": 474 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033810562381900253, + "loss": 0.5583, + "step": 475 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033779338381513736, + "loss": 0.4679, + "step": 476 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003374805031789613, + "loss": 0.5325, + "step": 477 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033716698336512654, + "loss": 0.6601, + "step": 478 + }, + { + "epoch": 0.92, + "learning_rate": 0.000336852825831257, + "loss": 0.4838, + "step": 479 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003365380320379414, + "loss": 0.5588, + "step": 480 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033622260344872665, + "loss": 0.4596, + "step": 481 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003359065415301108, + "loss": 0.5228, + "step": 482 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033558984775153663, + "loss": 0.5316, + "step": 483 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033527252358538437, + "loss": 0.4761, + "step": 484 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003349545705069653, + "loss": 0.5254, + "step": 485 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003346359899945144, + "loss": 0.4786, + "step": 486 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033431678352918384, + "loss": 0.4302, + "step": 487 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003339969525950361, + "loss": 0.4914, + "step": 488 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033367649867903663, + "loss": 0.4102, + "step": 489 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003333554232710477, + "loss": 0.4698, + "step": 490 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003330337278638207, + "loss": 0.4454, + "step": 491 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033271141395298964, + "loss": 0.4648, + "step": 492 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033238848303706415, + "loss": 0.4616, + "step": 493 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033206493661742237, + "loss": 0.4861, + "step": 494 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033174077619830416, + "loss": 0.4797, + "step": 495 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033141600328680373, + "loss": 0.5104, + "step": 496 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033109061939286336, + "loss": 0.5712, + "step": 497 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033076462602926553, + "loss": 0.5425, + "step": 498 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033043802471162636, + "loss": 0.6156, + "step": 499 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003301108169583887, + "loss": 0.4282, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003297830042908146, + "loss": 0.4088, + "step": 501 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032945458823297857, + "loss": 0.4866, + "step": 502 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003291255703117605, + "loss": 0.5045, + "step": 503 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003287959520568384, + "loss": 0.491, + "step": 504 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032846573500068136, + "loss": 0.458, + "step": 505 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032813492067854246, + "loss": 0.4508, + "step": 506 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003278035106284516, + "loss": 0.4294, + "step": 507 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032747150639120834, + "loss": 0.4834, + "step": 508 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032713890951037477, + "loss": 0.3857, + "step": 509 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032680572153226834, + "loss": 0.4072, + "step": 510 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003264719440059545, + "loss": 0.4028, + "step": 511 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032613757848323977, + "loss": 0.3789, + "step": 512 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032580262651866446, + "loss": 0.4944, + "step": 513 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003254670896694952, + "loss": 0.4259, + "step": 514 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032513096949571805, + "loss": 0.5037, + "step": 515 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032479426756003093, + "loss": 0.5857, + "step": 516 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003244569854278366, + "loss": 0.5407, + "step": 517 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032411912466723524, + "loss": 0.499, + "step": 518 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003237806868490172, + "loss": 0.4359, + "step": 519 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032344167354665573, + "loss": 0.4374, + "step": 520 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003231020863362997, + "loss": 0.4172, + "step": 521 + }, + { + "epoch": 1.01, + "learning_rate": 0.000322761926796766, + "loss": 0.4451, + "step": 522 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003224211965095326, + "loss": 0.4, + "step": 523 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003220798970587309, + "loss": 0.4009, + "step": 524 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003217380300311386, + "loss": 0.3966, + "step": 525 + }, + { + "epoch": 1.01, + "learning_rate": 0.000321395597016172, + "loss": 0.4255, + "step": 526 + }, + { + "epoch": 1.01, + "learning_rate": 0.00032105259960587895, + "loss": 0.4707, + "step": 527 + }, + { + "epoch": 1.02, + "learning_rate": 0.00032070903939493124, + "loss": 0.5313, + "step": 528 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003203649179806172, + "loss": 0.3596, + "step": 529 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003200202369628345, + "loss": 0.5223, + "step": 530 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031967499794408234, + "loss": 0.4146, + "step": 531 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031932920252945423, + "loss": 0.4328, + "step": 532 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003189828523266306, + "loss": 0.4258, + "step": 533 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031863594894587105, + "loss": 0.4457, + "step": 534 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003182884940000072, + "loss": 0.5249, + "step": 535 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003179404891044348, + "loss": 0.4751, + "step": 536 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031759193587710676, + "loss": 0.5378, + "step": 537 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031724283593852497, + "loss": 0.634, + "step": 538 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031689319091173326, + "loss": 0.4298, + "step": 539 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031654300242230977, + "loss": 0.5469, + "step": 540 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031619227209835917, + "loss": 0.5153, + "step": 541 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003158410015705053, + "loss": 0.4144, + "step": 542 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003154891924718837, + "loss": 0.6041, + "step": 543 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003151368464381335, + "loss": 0.4891, + "step": 544 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003147839651073904, + "loss": 0.5258, + "step": 545 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031443055012027874, + "loss": 0.4351, + "step": 546 + }, + { + "epoch": 1.05, + "learning_rate": 0.000314076603119904, + "loss": 0.4556, + "step": 547 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031372212575184514, + "loss": 0.5445, + "step": 548 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031336711966414675, + "loss": 0.5585, + "step": 549 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003130115865073117, + "loss": 0.367, + "step": 550 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003126555279342933, + "loss": 0.4877, + "step": 551 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003122989456004876, + "loss": 0.4335, + "step": 552 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003119418411637258, + "loss": 0.4383, + "step": 553 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003115842162842663, + "loss": 0.4508, + "step": 554 + }, + { + "epoch": 1.07, + "learning_rate": 0.00031122607262478743, + "loss": 0.4631, + "step": 555 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003108674118503793, + "loss": 0.3496, + "step": 556 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003105082356285361, + "loss": 0.4108, + "step": 557 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003101485456291486, + "loss": 0.4877, + "step": 558 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030978834352449614, + "loss": 0.3696, + "step": 559 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030942763098923913, + "loss": 0.5138, + "step": 560 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030906640970041084, + "loss": 0.5961, + "step": 561 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003087046813374099, + "loss": 0.3824, + "step": 562 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030834244758199276, + "loss": 0.4925, + "step": 563 + }, + { + "epoch": 1.09, + "learning_rate": 0.000307979710118265, + "loss": 0.4511, + "step": 564 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030761647063267457, + "loss": 0.4306, + "step": 565 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003072527308140031, + "loss": 0.468, + "step": 566 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030688849235335856, + "loss": 0.4842, + "step": 567 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003065237569441671, + "loss": 0.4332, + "step": 568 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030615852628216537, + "loss": 0.4637, + "step": 569 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003057928020653925, + "loss": 0.6193, + "step": 570 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003054265859941824, + "loss": 0.5033, + "step": 571 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030505987977115555, + "loss": 0.4185, + "step": 572 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003046926851012114, + "loss": 0.4211, + "step": 573 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003043250036915201, + "loss": 0.5089, + "step": 574 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030395683725151505, + "loss": 0.517, + "step": 575 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003035881874928845, + "loss": 0.492, + "step": 576 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003032190561295636, + "loss": 0.4535, + "step": 577 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003028494448777269, + "loss": 0.3947, + "step": 578 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030247935545577986, + "loss": 0.3125, + "step": 579 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003021087895843511, + "loss": 0.3882, + "step": 580 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003017377489862845, + "loss": 0.4802, + "step": 581 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030136623538663083, + "loss": 0.4652, + "step": 582 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030099425051263994, + "loss": 0.3816, + "step": 583 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003006217960937529, + "loss": 0.4583, + "step": 584 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030024887386159385, + "loss": 0.4568, + "step": 585 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029987548554996174, + "loss": 0.3908, + "step": 586 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002995016328948225, + "loss": 0.4235, + "step": 587 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029912731763430075, + "loss": 0.4138, + "step": 588 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029875254150867216, + "loss": 0.5838, + "step": 589 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002983773062603548, + "loss": 0.462, + "step": 590 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029800161363390145, + "loss": 0.4632, + "step": 591 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029762546537599125, + "loss": 0.5898, + "step": 592 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002972488632354218, + "loss": 0.4742, + "step": 593 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029687180896310065, + "loss": 0.4579, + "step": 594 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002964943043120378, + "loss": 0.5514, + "step": 595 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029611635103733675, + "loss": 0.4304, + "step": 596 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002957379508961871, + "loss": 0.4383, + "step": 597 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029535910564785584, + "loss": 0.5327, + "step": 598 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029497981705367933, + "loss": 0.4781, + "step": 599 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029460008687705525, + "loss": 0.4178, + "step": 600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002942199168834342, + "loss": 0.3987, + "step": 601 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029383930884031183, + "loss": 0.3861, + "step": 602 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029345826451722005, + "loss": 0.5322, + "step": 603 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029307678568571936, + "loss": 0.3997, + "step": 604 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002926948741193903, + "loss": 0.4121, + "step": 605 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029231253159382514, + "loss": 0.4931, + "step": 606 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029192975988662017, + "loss": 0.4626, + "step": 607 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029154656077736666, + "loss": 0.4441, + "step": 608 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002911629360476432, + "loss": 0.3863, + "step": 609 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029077888748100703, + "loss": 0.36, + "step": 610 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029039441686298594, + "loss": 0.4246, + "step": 611 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002900095259810702, + "loss": 0.4916, + "step": 612 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028962421662470346, + "loss": 0.4896, + "step": 613 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028923849058527535, + "loss": 0.4237, + "step": 614 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028885234965611274, + "loss": 0.5727, + "step": 615 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028846579563247116, + "loss": 0.5681, + "step": 616 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002880788303115269, + "loss": 0.4383, + "step": 617 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028769145549236845, + "loss": 0.4962, + "step": 618 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002873036729759881, + "loss": 0.5472, + "step": 619 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002869154845652738, + "loss": 0.5431, + "step": 620 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002865268920650003, + "loss": 0.4152, + "step": 621 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002861378972818211, + "loss": 0.3922, + "step": 622 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002857485020242602, + "loss": 0.5129, + "step": 623 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002853587081027034, + "loss": 0.4328, + "step": 624 + }, + { + "epoch": 1.2, + "learning_rate": 0.00028496851732938997, + "loss": 0.4431, + "step": 625 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002845779315184042, + "loss": 0.4968, + "step": 626 + }, + { + "epoch": 1.21, + "learning_rate": 0.000284186952485667, + "loss": 0.5301, + "step": 627 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002837955820489276, + "loss": 0.4332, + "step": 628 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002834038220277546, + "loss": 0.4245, + "step": 629 + }, + { + "epoch": 1.21, + "learning_rate": 0.00028301167424352836, + "loss": 0.5057, + "step": 630 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028261914051943166, + "loss": 0.4623, + "step": 631 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028222622268044174, + "loss": 0.5452, + "step": 632 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028183292255332164, + "loss": 0.5238, + "step": 633 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028143924196661176, + "loss": 0.3966, + "step": 634 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002810451827506214, + "loss": 0.35, + "step": 635 + }, + { + "epoch": 1.23, + "learning_rate": 0.00028065074673742007, + "loss": 0.4325, + "step": 636 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002802559357608292, + "loss": 0.4854, + "step": 637 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027986075165641343, + "loss": 0.4254, + "step": 638 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027946519626147225, + "loss": 0.4614, + "step": 639 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027906927141503125, + "loss": 0.3798, + "step": 640 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027867297895783373, + "loss": 0.4742, + "step": 641 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002782763207323322, + "loss": 0.4007, + "step": 642 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002778792985826795, + "loss": 0.4383, + "step": 643 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002774819143547206, + "loss": 0.4298, + "step": 644 + }, + { + "epoch": 1.24, + "learning_rate": 0.00027708416989598387, + "loss": 0.5178, + "step": 645 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002766860670556722, + "loss": 0.3434, + "step": 646 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002762876076846551, + "loss": 0.3862, + "step": 647 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027588879363545934, + "loss": 0.4459, + "step": 648 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002754896267622608, + "loss": 0.3934, + "step": 649 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027509010892087565, + "loss": 0.4349, + "step": 650 + }, + { + "epoch": 1.25, + "learning_rate": 0.000274690241968752, + "loss": 0.4178, + "step": 651 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002742900277649607, + "loss": 0.4151, + "step": 652 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002738894681701874, + "loss": 0.3888, + "step": 653 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027348856504672323, + "loss": 0.4214, + "step": 654 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002730873202584567, + "loss": 0.519, + "step": 655 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027268573567086477, + "loss": 0.5463, + "step": 656 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027228381315100417, + "loss": 0.3367, + "step": 657 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027188155456750256, + "loss": 0.4629, + "step": 658 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027147896179055043, + "loss": 0.4456, + "step": 659 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002710760366918917, + "loss": 0.4348, + "step": 660 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002706727811448153, + "loss": 0.4505, + "step": 661 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002702691970241468, + "loss": 0.5028, + "step": 662 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026986528620623904, + "loss": 0.5257, + "step": 663 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026946105056896403, + "loss": 0.4977, + "step": 664 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026905649199170377, + "loss": 0.421, + "step": 665 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002686516123553417, + "loss": 0.4931, + "step": 666 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026824641354225397, + "loss": 0.5818, + "step": 667 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002678408974363005, + "loss": 0.4211, + "step": 668 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026743506592281674, + "loss": 0.5182, + "step": 669 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026702892088860413, + "loss": 0.5591, + "step": 670 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002666224642219221, + "loss": 0.5363, + "step": 671 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002662156978124786, + "loss": 0.5866, + "step": 672 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002658086235514218, + "loss": 0.422, + "step": 673 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002654012433313312, + "loss": 0.5375, + "step": 674 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002649935590462087, + "loss": 0.4752, + "step": 675 + }, + { + "epoch": 1.3, + "learning_rate": 0.00026458557259146986, + "loss": 0.4271, + "step": 676 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002641772858639351, + "loss": 0.4843, + "step": 677 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026376870076182086, + "loss": 0.4827, + "step": 678 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026335981918473086, + "loss": 0.47, + "step": 679 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002629506430336472, + "loss": 0.368, + "step": 680 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026254117421092133, + "loss": 0.481, + "step": 681 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002621314146202656, + "loss": 0.4153, + "step": 682 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002617213661667443, + "loss": 0.4397, + "step": 683 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002613110307567643, + "loss": 0.4052, + "step": 684 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026090041029806695, + "loss": 0.4652, + "step": 685 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026048950669971884, + "loss": 0.3826, + "step": 686 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026007832187210277, + "loss": 0.5639, + "step": 687 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025966685772690906, + "loss": 0.3917, + "step": 688 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025925511617712685, + "loss": 0.5248, + "step": 689 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002588430991370347, + "loss": 0.3796, + "step": 690 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002584308085221922, + "loss": 0.4391, + "step": 691 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025801824624943084, + "loss": 0.4514, + "step": 692 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025760541423684496, + "loss": 0.5046, + "step": 693 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002571923144037831, + "loss": 0.4578, + "step": 694 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002567789486708389, + "loss": 0.4681, + "step": 695 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025636531895984236, + "loss": 0.4501, + "step": 696 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002559514271938506, + "loss": 0.4411, + "step": 697 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025553727529713916, + "loss": 0.401, + "step": 698 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025512286519519293, + "loss": 0.4911, + "step": 699 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002547081988146974, + "loss": 0.3754, + "step": 700 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025429327808352946, + "loss": 0.3807, + "step": 701 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002538781049307486, + "loss": 0.4193, + "step": 702 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002534626812865876, + "loss": 0.5259, + "step": 703 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025304700908244433, + "loss": 0.3684, + "step": 704 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002526310902508718, + "loss": 0.5423, + "step": 705 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002522149267255699, + "loss": 0.4288, + "step": 706 + }, + { + "epoch": 1.36, + "learning_rate": 0.000251798520441376, + "loss": 0.5046, + "step": 707 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002513818733342564, + "loss": 0.3777, + "step": 708 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025096498734129667, + "loss": 0.5171, + "step": 709 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002505478644006932, + "loss": 0.3785, + "step": 710 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025013050645174414, + "loss": 0.5413, + "step": 711 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024971291543483994, + "loss": 0.5018, + "step": 712 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024929509329145477, + "loss": 0.5212, + "step": 713 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024887704196413746, + "loss": 0.483, + "step": 714 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002484587633965023, + "loss": 0.3684, + "step": 715 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024804025953322005, + "loss": 0.3782, + "step": 716 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024762153232000877, + "loss": 0.4995, + "step": 717 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002472025837036253, + "loss": 0.4324, + "step": 718 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002467834156318555, + "loss": 0.5203, + "step": 719 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002463640300535057, + "loss": 0.423, + "step": 720 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002459444289183933, + "loss": 0.4537, + "step": 721 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024552461417733817, + "loss": 0.4124, + "step": 722 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002451045877821528, + "loss": 0.4865, + "step": 723 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002446843516856343, + "loss": 0.4845, + "step": 724 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024426390784155425, + "loss": 0.4174, + "step": 725 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024384325820465033, + "loss": 0.4456, + "step": 726 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002434224047306169, + "loss": 0.4429, + "step": 727 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002430013493760961, + "loss": 0.363, + "step": 728 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024258009409866853, + "loss": 0.4769, + "step": 729 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024215864085684442, + "loss": 0.4597, + "step": 730 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024173699161005429, + "loss": 0.366, + "step": 731 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024131514831863995, + "loss": 0.4746, + "step": 732 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002408931129438453, + "loss": 0.5608, + "step": 733 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024047088744780744, + "loss": 0.4292, + "step": 734 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024004847379354726, + "loss": 0.4743, + "step": 735 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023962587394496038, + "loss": 0.3855, + "step": 736 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023920308986680834, + "loss": 0.4573, + "step": 737 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023878012352470892, + "loss": 0.3937, + "step": 738 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002383569768851274, + "loss": 0.4371, + "step": 739 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023793365191536735, + "loss": 0.5432, + "step": 740 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023751015058356135, + "loss": 0.4803, + "step": 741 + }, + { + "epoch": 1.43, + "learning_rate": 0.000237086474858662, + "loss": 0.4281, + "step": 742 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023666262671043263, + "loss": 0.4031, + "step": 743 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023623860810943826, + "loss": 0.4725, + "step": 744 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002358144210270364, + "loss": 0.4644, + "step": 745 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023539006743536774, + "loss": 0.4848, + "step": 746 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023496554930734718, + "loss": 0.4084, + "step": 747 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023454086861665472, + "loss": 0.4322, + "step": 748 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023411602733772595, + "loss": 0.4847, + "step": 749 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023369102744574312, + "loss": 0.4298, + "step": 750 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023326587091662603, + "loss": 0.4268, + "step": 751 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023284055972702254, + "loss": 0.4089, + "step": 752 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002324150958542997, + "loss": 0.4214, + "step": 753 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023198948127653446, + "loss": 0.5576, + "step": 754 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023156371797250418, + "loss": 0.4377, + "step": 755 + }, + { + "epoch": 1.46, + "learning_rate": 0.00023113780792167785, + "loss": 0.4934, + "step": 756 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002307117531042068, + "loss": 0.3698, + "step": 757 + }, + { + "epoch": 1.46, + "learning_rate": 0.00023028555550091536, + "loss": 0.4722, + "step": 758 + }, + { + "epoch": 1.46, + "learning_rate": 0.00022985921709329157, + "loss": 0.3837, + "step": 759 + }, + { + "epoch": 1.46, + "learning_rate": 0.00022943273986347822, + "loss": 0.5132, + "step": 760 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002290061257942635, + "loss": 0.487, + "step": 761 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022857937686907183, + "loss": 0.3857, + "step": 762 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022815249507195445, + "loss": 0.4135, + "step": 763 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022772548238758064, + "loss": 0.4639, + "step": 764 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022729834080122791, + "loss": 0.5297, + "step": 765 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022687107229877324, + "loss": 0.4485, + "step": 766 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022644367886668357, + "loss": 0.467, + "step": 767 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022601616249200675, + "loss": 0.4304, + "step": 768 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022558852516236217, + "loss": 0.5531, + "step": 769 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022516076886593158, + "loss": 0.5021, + "step": 770 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022473289559144988, + "loss": 0.433, + "step": 771 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022430490732819566, + "loss": 0.505, + "step": 772 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022387680606598235, + "loss": 0.4677, + "step": 773 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022344859379514858, + "loss": 0.4421, + "step": 774 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022302027250654905, + "loss": 0.4282, + "step": 775 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002225918441915456, + "loss": 0.366, + "step": 776 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022216331084199724, + "loss": 0.4147, + "step": 777 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022173467445025158, + "loss": 0.586, + "step": 778 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022130593700913522, + "loss": 0.5285, + "step": 779 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022087710051194463, + "loss": 0.4484, + "step": 780 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002204481669524367, + "loss": 0.4063, + "step": 781 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002200191383248197, + "loss": 0.4751, + "step": 782 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021959001662374373, + "loss": 0.3936, + "step": 783 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021916080384429184, + "loss": 0.4433, + "step": 784 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002187315019819703, + "loss": 0.4883, + "step": 785 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021830211303269965, + "loss": 0.4925, + "step": 786 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021787263899280537, + "loss": 0.4597, + "step": 787 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021744308185900848, + "loss": 0.4954, + "step": 788 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021701344362841626, + "loss": 0.4025, + "step": 789 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021658372629851318, + "loss": 0.5734, + "step": 790 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021615393186715128, + "loss": 0.3779, + "step": 791 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021572406233254116, + "loss": 0.4994, + "step": 792 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021529411969324275, + "loss": 0.5359, + "step": 793 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021486410594815554, + "loss": 0.4738, + "step": 794 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021443402309650979, + "loss": 0.4915, + "step": 795 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021400387313785704, + "loss": 0.4991, + "step": 796 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021357365807206087, + "loss": 0.4503, + "step": 797 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002131433798992874, + "loss": 0.4887, + "step": 798 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021271304061999633, + "loss": 0.4279, + "step": 799 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021228264223493139, + "loss": 0.5367, + "step": 800 + } + ], + "logging_steps": 1, + "max_steps": 1557, + "num_train_epochs": 3, + "save_steps": 50, + "total_flos": 1.0713923068693709e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbdf20491848d40e9a89bca19c6229b4b2b55e5d --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07819caee47f45203545f962678d52b5954ff1fd4afe1d5152fad48004402099 +size 4155 diff --git a/checkpoint-850/README.md b/checkpoint-850/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b32efe7366f05d1d90816d2ad9e4b06ccca46bea --- /dev/null +++ b/checkpoint-850/README.md @@ -0,0 +1,219 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 + +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-850/adapter_config.json b/checkpoint-850/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e108f2da037ef6250457c67a4bedd308d97303c --- /dev/null +++ b/checkpoint-850/adapter_config.json @@ -0,0 +1,24 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-850/adapter_model.bin b/checkpoint-850/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1a5b6b7fe927689d66fc8724b5a5d06cf9c7607 --- /dev/null +++ b/checkpoint-850/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f834c68c70315b5fce9b1dc11afe9e89264c74d708a40b82ee8eea83de9b37f7 +size 113314765 diff --git a/checkpoint-850/optimizer.pt b/checkpoint-850/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..388fb19977c5ad24dd58b9a0fc1afd4b15610f91 --- /dev/null +++ b/checkpoint-850/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b5c5aea4dee5474f182bc612310d2a53e1f96beba93053df1b1b96348ae641 +size 226653957 diff --git a/checkpoint-850/rng_state.pth b/checkpoint-850/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5079e140bc6f3bee99ea24435ba209cd8c7305fd --- /dev/null +++ b/checkpoint-850/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:515e6d64496266b6c21c0d830c6738ebd8fcdfe672c3416d33ba0ae2c7eaa603 +size 14575 diff --git a/checkpoint-850/scheduler.pt b/checkpoint-850/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..66e71dbbf1209b1a1626639ebaad37283b8ac3d8 --- /dev/null +++ b/checkpoint-850/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42c783ab5a07507f0b1f8628363ba986357d070d6f9196f44a6f8c605489906 +size 627 diff --git a/checkpoint-850/trainer_state.json b/checkpoint-850/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0e8f913a77ddc6013eeadb47d4c64c93aa387212 --- /dev/null +++ b/checkpoint-850/trainer_state.json @@ -0,0 +1,5119 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6373811529666626, + "eval_steps": 500, + "global_step": 850, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6869, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.8396, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 0.7489, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7252, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.6548, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 0.8022, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6524, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6981, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 0.7488, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 0.6368, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.6891, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 0.7968, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 5.2000000000000004e-05, + "loss": 0.6912, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 5.6000000000000006e-05, + "loss": 0.8452, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 6e-05, + "loss": 0.6989, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 6.400000000000001e-05, + "loss": 0.6685, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-05, + "loss": 0.5469, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 7.2e-05, + "loss": 0.7915, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 7.6e-05, + "loss": 0.7744, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 8e-05, + "loss": 0.6804, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 8.4e-05, + "loss": 0.7796, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 8.800000000000001e-05, + "loss": 0.706, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000001e-05, + "loss": 0.6798, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 9.6e-05, + "loss": 0.6333, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001, + "loss": 0.6012, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010400000000000001, + "loss": 0.52, + "step": 26 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010800000000000001, + "loss": 0.6583, + "step": 27 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011200000000000001, + "loss": 0.7354, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 0.000116, + "loss": 0.6296, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012, + "loss": 0.6352, + "step": 30 + }, + { + "epoch": 0.06, + "learning_rate": 0.000124, + "loss": 0.6007, + "step": 31 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012800000000000002, + "loss": 0.5659, + "step": 32 + }, + { + "epoch": 0.06, + "learning_rate": 0.000132, + "loss": 0.5138, + "step": 33 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013600000000000003, + "loss": 0.6639, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014, + "loss": 0.5934, + "step": 35 + }, + { + "epoch": 0.07, + "learning_rate": 0.000144, + "loss": 0.5233, + "step": 36 + }, + { + "epoch": 0.07, + "learning_rate": 0.000148, + "loss": 0.5307, + "step": 37 + }, + { + "epoch": 0.07, + "learning_rate": 0.000152, + "loss": 0.5928, + "step": 38 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015600000000000002, + "loss": 0.5908, + "step": 39 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016, + "loss": 0.6366, + "step": 40 + }, + { + "epoch": 0.08, + "learning_rate": 0.000164, + "loss": 0.5972, + "step": 41 + }, + { + "epoch": 0.08, + "learning_rate": 0.000168, + "loss": 0.4825, + "step": 42 + }, + { + "epoch": 0.08, + "learning_rate": 0.000172, + "loss": 0.6783, + "step": 43 + }, + { + "epoch": 0.08, + "learning_rate": 0.00017600000000000002, + "loss": 0.6082, + "step": 44 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018, + "loss": 0.7633, + "step": 45 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018400000000000003, + "loss": 0.5988, + "step": 46 + }, + { + "epoch": 0.09, + "learning_rate": 0.000188, + "loss": 0.6658, + "step": 47 + }, + { + "epoch": 0.09, + "learning_rate": 0.000192, + "loss": 0.5945, + "step": 48 + }, + { + "epoch": 0.09, + "learning_rate": 0.000196, + "loss": 0.5984, + "step": 49 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002, + "loss": 0.6778, + "step": 50 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020400000000000003, + "loss": 0.6057, + "step": 51 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020800000000000001, + "loss": 0.601, + "step": 52 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021200000000000003, + "loss": 0.5566, + "step": 53 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021600000000000002, + "loss": 0.5911, + "step": 54 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022000000000000003, + "loss": 0.7636, + "step": 55 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022400000000000002, + "loss": 0.5537, + "step": 56 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022799999999999999, + "loss": 0.6037, + "step": 57 + }, + { + "epoch": 0.11, + "learning_rate": 0.000232, + "loss": 0.6474, + "step": 58 + }, + { + "epoch": 0.11, + "learning_rate": 0.000236, + "loss": 0.6483, + "step": 59 + }, + { + "epoch": 0.12, + "learning_rate": 0.00024, + "loss": 0.5021, + "step": 60 + }, + { + "epoch": 0.12, + "learning_rate": 0.000244, + "loss": 0.5347, + "step": 61 + }, + { + "epoch": 0.12, + "learning_rate": 0.000248, + "loss": 0.5791, + "step": 62 + }, + { + "epoch": 0.12, + "learning_rate": 0.000252, + "loss": 0.5407, + "step": 63 + }, + { + "epoch": 0.12, + "learning_rate": 0.00025600000000000004, + "loss": 0.5298, + "step": 64 + }, + { + "epoch": 0.13, + "learning_rate": 0.00026000000000000003, + "loss": 0.5685, + "step": 65 + }, + { + "epoch": 0.13, + "learning_rate": 0.000264, + "loss": 0.5108, + "step": 66 + }, + { + "epoch": 0.13, + "learning_rate": 0.000268, + "loss": 0.526, + "step": 67 + }, + { + "epoch": 0.13, + "learning_rate": 0.00027200000000000005, + "loss": 0.6843, + "step": 68 + }, + { + "epoch": 0.13, + "learning_rate": 0.000276, + "loss": 0.6608, + "step": 69 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028, + "loss": 0.5866, + "step": 70 + }, + { + "epoch": 0.14, + "learning_rate": 0.000284, + "loss": 0.6422, + "step": 71 + }, + { + "epoch": 0.14, + "learning_rate": 0.000288, + "loss": 0.449, + "step": 72 + }, + { + "epoch": 0.14, + "learning_rate": 0.000292, + "loss": 0.5319, + "step": 73 + }, + { + "epoch": 0.14, + "learning_rate": 0.000296, + "loss": 0.5977, + "step": 74 + }, + { + "epoch": 0.14, + "learning_rate": 0.00030000000000000003, + "loss": 0.5805, + "step": 75 + }, + { + "epoch": 0.15, + "learning_rate": 0.000304, + "loss": 0.5209, + "step": 76 + }, + { + "epoch": 0.15, + "learning_rate": 0.000308, + "loss": 0.6098, + "step": 77 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031200000000000005, + "loss": 0.4665, + "step": 78 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031600000000000004, + "loss": 0.6882, + "step": 79 + }, + { + "epoch": 0.15, + "learning_rate": 0.00032, + "loss": 0.5427, + "step": 80 + }, + { + "epoch": 0.16, + "learning_rate": 0.000324, + "loss": 0.5345, + "step": 81 + }, + { + "epoch": 0.16, + "learning_rate": 0.000328, + "loss": 0.663, + "step": 82 + }, + { + "epoch": 0.16, + "learning_rate": 0.000332, + "loss": 0.5393, + "step": 83 + }, + { + "epoch": 0.16, + "learning_rate": 0.000336, + "loss": 0.5711, + "step": 84 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034, + "loss": 0.5261, + "step": 85 + }, + { + "epoch": 0.17, + "learning_rate": 0.000344, + "loss": 0.5775, + "step": 86 + }, + { + "epoch": 0.17, + "learning_rate": 0.000348, + "loss": 0.6329, + "step": 87 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035200000000000005, + "loss": 0.4425, + "step": 88 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035600000000000003, + "loss": 0.6837, + "step": 89 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036, + "loss": 0.615, + "step": 90 + }, + { + "epoch": 0.18, + "learning_rate": 0.000364, + "loss": 0.5615, + "step": 91 + }, + { + "epoch": 0.18, + "learning_rate": 0.00036800000000000005, + "loss": 0.5434, + "step": 92 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037200000000000004, + "loss": 0.5864, + "step": 93 + }, + { + "epoch": 0.18, + "learning_rate": 0.000376, + "loss": 0.5583, + "step": 94 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038, + "loss": 0.5299, + "step": 95 + }, + { + "epoch": 0.18, + "learning_rate": 0.000384, + "loss": 0.532, + "step": 96 + }, + { + "epoch": 0.19, + "learning_rate": 0.000388, + "loss": 0.5227, + "step": 97 + }, + { + "epoch": 0.19, + "learning_rate": 0.000392, + "loss": 0.5275, + "step": 98 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039600000000000003, + "loss": 0.4541, + "step": 99 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004, + "loss": 0.6485, + "step": 100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003999995350775973, + "loss": 0.5438, + "step": 101 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999814031255063, + "loss": 0.5997, + "step": 102 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999581571134455, + "loss": 0.5322, + "step": 103 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003999925612847867, + "loss": 0.484, + "step": 104 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998837704800766, + "loss": 0.5961, + "step": 105 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998326302046085, + "loss": 0.7405, + "step": 106 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997721922592255, + "loss": 0.5802, + "step": 107 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997024569249167, + "loss": 0.769, + "step": 108 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999623424525898, + "loss": 0.5598, + "step": 109 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999535095429608, + "loss": 0.6143, + "step": 110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039994374700467095, + "loss": 0.5766, + "step": 111 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039993305488310836, + "loss": 0.7695, + "step": 112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999214332279831, + "loss": 0.7153, + "step": 113 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999088820933269, + "loss": 0.5835, + "step": 114 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039989540153749286, + "loss": 0.6634, + "step": 115 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399880991623155, + "loss": 0.6069, + "step": 116 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998656524173082, + "loss": 0.7224, + "step": 117 + }, + { + "epoch": 0.23, + "learning_rate": 0.000399849383991268, + "loss": 0.5884, + "step": 118 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998321864206699, + "loss": 0.5122, + "step": 119 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039981405978546924, + "loss": 0.6453, + "step": 120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997950041699408, + "loss": 0.4665, + "step": 121 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997750196626785, + "loss": 0.5428, + "step": 122 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039975410635659464, + "loss": 0.4365, + "step": 123 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039973226434891995, + "loss": 0.5978, + "step": 124 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039970949374120286, + "loss": 0.7729, + "step": 125 + }, + { + "epoch": 0.24, + "learning_rate": 0.000399685794639309, + "loss": 0.6212, + "step": 126 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039966116715342066, + "loss": 0.5426, + "step": 127 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039963561139803676, + "loss": 0.5782, + "step": 128 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003996091274919716, + "loss": 0.6701, + "step": 129 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995817155583548, + "loss": 0.6314, + "step": 130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995533757246307, + "loss": 0.6662, + "step": 131 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995241081225573, + "loss": 0.5192, + "step": 132 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994939128882065, + "loss": 0.5591, + "step": 133 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994627901619625, + "loss": 0.5809, + "step": 134 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994307400885219, + "loss": 0.4871, + "step": 135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993977628168928, + "loss": 0.6666, + "step": 136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993638585003938, + "loss": 0.6469, + "step": 137 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039932902729665357, + "loss": 0.5727, + "step": 138 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039929326936761036, + "loss": 0.6715, + "step": 139 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039925658487951067, + "loss": 0.5686, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039921897400290894, + "loss": 0.501, + "step": 141 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039918043691266665, + "loss": 0.5795, + "step": 142 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039914097378795124, + "loss": 0.6287, + "step": 143 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039910058481223564, + "loss": 0.7016, + "step": 144 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039905927017329726, + "loss": 0.6232, + "step": 145 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039901703006321715, + "loss": 0.5291, + "step": 146 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039897386467837903, + "loss": 0.5297, + "step": 147 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892977421946844, + "loss": 0.5784, + "step": 148 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988847588914718, + "loss": 0.5714, + "step": 149 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988388189036754, + "loss": 0.5044, + "step": 150 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987919544696646, + "loss": 0.8246, + "step": 151 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987441658073226, + "loss": 0.5048, + "step": 152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003986954531388297, + "loss": 0.5433, + "step": 153 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039864581669066186, + "loss": 0.5251, + "step": 154 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003985952566935902, + "loss": 0.5708, + "step": 155 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039854377338267936, + "loss": 0.6276, + "step": 156 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039849136699728684, + "loss": 0.4915, + "step": 157 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003984380377810617, + "loss": 0.6389, + "step": 158 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039838378598194325, + "loss": 0.6067, + "step": 159 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039832861185216045, + "loss": 0.6136, + "step": 160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003982725156482301, + "loss": 0.5597, + "step": 161 + }, + { + "epoch": 0.31, + "learning_rate": 0.000398215497630956, + "loss": 0.5957, + "step": 162 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003981575580654278, + "loss": 0.5853, + "step": 163 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980986972210194, + "loss": 0.5462, + "step": 164 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980389153713881, + "loss": 0.5302, + "step": 165 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039797821279447307, + "loss": 0.5395, + "step": 166 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039791658977249425, + "loss": 0.7004, + "step": 167 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039785404659195084, + "loss": 0.5622, + "step": 168 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039779058354362013, + "loss": 0.5759, + "step": 169 + }, + { + "epoch": 0.33, + "learning_rate": 0.000397726200922556, + "loss": 0.6184, + "step": 170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003976608990280877, + "loss": 0.5488, + "step": 171 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975946781638183, + "loss": 0.6162, + "step": 172 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975275386376236, + "loss": 0.558, + "step": 173 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003974594807616502, + "loss": 0.519, + "step": 174 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003973905048523144, + "loss": 0.6195, + "step": 175 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039732061123030064, + "loss": 0.5991, + "step": 176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003972498002205601, + "loss": 0.5428, + "step": 177 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039717807215230896, + "loss": 0.5323, + "step": 178 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039710542735902705, + "loss": 0.5307, + "step": 179 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003970318661784564, + "loss": 0.5783, + "step": 180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003969573889525993, + "loss": 0.5924, + "step": 181 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039688199602771714, + "loss": 0.5902, + "step": 182 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039680568775432855, + "loss": 0.6291, + "step": 183 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003967284644872077, + "loss": 0.5942, + "step": 184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003966503265853829, + "loss": 0.4878, + "step": 185 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003965712744121347, + "loss": 0.6487, + "step": 186 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003964913083349945, + "loss": 0.6111, + "step": 187 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039641042872574233, + "loss": 0.6072, + "step": 188 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039632863596040575, + "loss": 0.716, + "step": 189 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039624593041925763, + "loss": 0.6178, + "step": 190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003961623124868145, + "loss": 0.6323, + "step": 191 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039607778255183485, + "loss": 0.5821, + "step": 192 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959923410073174, + "loss": 0.6738, + "step": 193 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959059882504989, + "loss": 0.6203, + "step": 194 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039581872468285277, + "loss": 0.632, + "step": 195 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003957305507100868, + "loss": 0.5857, + "step": 196 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039564146674214164, + "loss": 0.6311, + "step": 197 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003955514731931885, + "loss": 0.5889, + "step": 198 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039546057048162763, + "loss": 0.5201, + "step": 199 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039536875903008607, + "loss": 0.5581, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039527603926541586, + "loss": 0.5104, + "step": 201 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039518241161869193, + "loss": 0.5978, + "step": 202 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039508787652521013, + "loss": 0.6244, + "step": 203 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039499243442448536, + "loss": 0.589, + "step": 204 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003948960857602493, + "loss": 0.575, + "step": 205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947988309804485, + "loss": 0.5494, + "step": 206 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947006705372422, + "loss": 0.4895, + "step": 207 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039460160488700036, + "loss": 0.5479, + "step": 208 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039450163449030124, + "loss": 0.5893, + "step": 209 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003944007598119297, + "loss": 0.5451, + "step": 210 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003942989813208747, + "loss": 0.5582, + "step": 211 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003941962994903273, + "loss": 0.5121, + "step": 212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039409271479767826, + "loss": 0.6324, + "step": 213 + }, + { + "epoch": 0.41, + "learning_rate": 0.000393988227724516, + "loss": 0.6118, + "step": 214 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003938828387566244, + "loss": 0.6303, + "step": 215 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003937765483839804, + "loss": 0.7705, + "step": 216 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003936693571007517, + "loss": 0.6224, + "step": 217 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003935612654052946, + "loss": 0.5664, + "step": 218 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039345227380015163, + "loss": 0.66, + "step": 219 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039334238279204906, + "loss": 0.5582, + "step": 220 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039323159289189505, + "loss": 0.6087, + "step": 221 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003931199046147764, + "loss": 0.5566, + "step": 222 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039300731847995716, + "loss": 0.5775, + "step": 223 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039289383501087534, + "loss": 0.5081, + "step": 224 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039277945473514104, + "loss": 0.5218, + "step": 225 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003926641781845338, + "loss": 0.6655, + "step": 226 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003925480058950002, + "loss": 0.5735, + "step": 227 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039243093840665114, + "loss": 0.6609, + "step": 228 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003923129762637596, + "loss": 0.7323, + "step": 229 + }, + { + "epoch": 0.44, + "learning_rate": 0.000392194120014758, + "loss": 0.5703, + "step": 230 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039207437021223583, + "loss": 0.6545, + "step": 231 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003919537274129366, + "loss": 0.521, + "step": 232 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039183219217775564, + "loss": 0.5257, + "step": 233 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003917097650717377, + "loss": 0.5487, + "step": 234 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039158644666407365, + "loss": 0.4861, + "step": 235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039146223752809845, + "loss": 0.4928, + "step": 236 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003913371382412883, + "loss": 0.5253, + "step": 237 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039121114938525756, + "loss": 0.6155, + "step": 238 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039108427154575684, + "loss": 0.55, + "step": 239 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039095650531266967, + "loss": 0.6617, + "step": 240 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039082785128000976, + "loss": 0.5198, + "step": 241 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039069831004591866, + "loss": 0.5302, + "step": 242 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003905678822126625, + "loss": 0.5347, + "step": 243 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039043656838662946, + "loss": 0.531, + "step": 244 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039030436917832697, + "loss": 0.4884, + "step": 245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039017128520237883, + "loss": 0.6027, + "step": 246 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003900373170775222, + "loss": 0.5537, + "step": 247 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038990246542660494, + "loss": 0.5753, + "step": 248 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038976673087658256, + "loss": 0.5059, + "step": 249 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038963011405851537, + "loss": 0.5118, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038949261560756565, + "loss": 0.5645, + "step": 251 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003893542361629944, + "loss": 0.5623, + "step": 252 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038921497636815866, + "loss": 0.5216, + "step": 253 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003890748368705085, + "loss": 0.4501, + "step": 254 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003889338183215838, + "loss": 0.48, + "step": 255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038879192137701135, + "loss": 0.5218, + "step": 256 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003886491466965018, + "loss": 0.5858, + "step": 257 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038850549494384685, + "loss": 0.6124, + "step": 258 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038836096678691536, + "loss": 0.4645, + "step": 259 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038821556289765136, + "loss": 0.474, + "step": 260 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038806928395207003, + "loss": 0.4364, + "step": 261 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038792213063025484, + "loss": 0.5821, + "step": 262 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003877741036163547, + "loss": 0.5393, + "step": 263 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003876252035985804, + "loss": 0.5373, + "step": 264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003874754312692013, + "loss": 0.6021, + "step": 265 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003873247873245426, + "loss": 0.4549, + "step": 266 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003871732724649817, + "loss": 0.5994, + "step": 267 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003870208873949453, + "loss": 0.4764, + "step": 268 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038686763282290556, + "loss": 0.4311, + "step": 269 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003867135094613774, + "loss": 0.5462, + "step": 270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003865585180269148, + "loss": 0.5006, + "step": 271 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003864026592401076, + "loss": 0.5347, + "step": 272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038624593382557835, + "loss": 0.5242, + "step": 273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038608834251197856, + "loss": 0.5005, + "step": 274 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038592988603198554, + "loss": 0.5436, + "step": 275 + }, + { + "epoch": 0.53, + "learning_rate": 0.000385770565122299, + "loss": 0.4658, + "step": 276 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003856103805236375, + "loss": 0.5273, + "step": 277 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038544933298073516, + "loss": 0.436, + "step": 278 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038528742324233804, + "loss": 0.4785, + "step": 279 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038512465206120086, + "loss": 0.5366, + "step": 280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038496102019408324, + "loss": 0.4448, + "step": 281 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038479652840174637, + "loss": 0.5132, + "step": 282 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038463117744894955, + "loss": 0.7918, + "step": 283 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038446496810444627, + "loss": 0.5309, + "step": 284 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038429790114098114, + "loss": 0.5316, + "step": 285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038412997733528576, + "loss": 0.4611, + "step": 286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038396119746807563, + "loss": 0.4609, + "step": 287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038379156232404613, + "loss": 0.5821, + "step": 288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003836210726918691, + "loss": 0.5883, + "step": 289 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003834497293641889, + "loss": 0.5012, + "step": 290 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038327753313761913, + "loss": 0.4457, + "step": 291 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038310448481273867, + "loss": 0.4851, + "step": 292 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038293058519408787, + "loss": 0.5622, + "step": 293 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038275583509016507, + "loss": 0.5703, + "step": 294 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038258023531342265, + "loss": 0.5718, + "step": 295 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003824037866802632, + "loss": 0.5183, + "step": 296 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038222649001103614, + "loss": 0.5085, + "step": 297 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038204834613003323, + "loss": 0.5388, + "step": 298 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038186935586548537, + "loss": 0.5425, + "step": 299 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003816895200495584, + "loss": 0.447, + "step": 300 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003815088395183493, + "loss": 0.5541, + "step": 301 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038132731511188227, + "loss": 0.5518, + "step": 302 + }, + { + "epoch": 0.58, + "learning_rate": 0.000381144947674105, + "loss": 0.5074, + "step": 303 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809617380528847, + "loss": 0.5134, + "step": 304 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807776871000037, + "loss": 0.4599, + "step": 305 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805927956711562, + "loss": 0.5838, + "step": 306 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038040706462594395, + "loss": 0.5216, + "step": 307 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038022049482787216, + "loss": 0.5323, + "step": 308 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003800330871443456, + "loss": 0.5681, + "step": 309 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037984484244666446, + "loss": 0.4172, + "step": 310 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003796557616100207, + "loss": 0.4958, + "step": 311 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003794658455134934, + "loss": 0.662, + "step": 312 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003792750950400451, + "loss": 0.5832, + "step": 313 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003790835110765174, + "loss": 0.4271, + "step": 314 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003788910945136271, + "loss": 0.4842, + "step": 315 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037869784624596186, + "loss": 0.4656, + "step": 316 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037850376717197626, + "loss": 0.4981, + "step": 317 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037830885819398733, + "loss": 0.5162, + "step": 318 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037811312021817067, + "loss": 0.652, + "step": 319 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003779165541545558, + "loss": 0.5104, + "step": 320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003777191609170225, + "loss": 0.4971, + "step": 321 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003775209414232962, + "loss": 0.4871, + "step": 322 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003773218965949436, + "loss": 0.5226, + "step": 323 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037712202735736884, + "loss": 0.4823, + "step": 324 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003769213346398087, + "loss": 0.497, + "step": 325 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003767198193753286, + "loss": 0.5976, + "step": 326 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765174825008181, + "loss": 0.4532, + "step": 327 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003763143249569868, + "loss": 0.5236, + "step": 328 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037611034768835947, + "loss": 0.6513, + "step": 329 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037590555164327224, + "loss": 0.5686, + "step": 330 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037569993777386774, + "loss": 0.456, + "step": 331 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003754935070360909, + "loss": 0.5181, + "step": 332 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003752862603896846, + "loss": 0.4765, + "step": 333 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037507819879818477, + "loss": 0.5363, + "step": 334 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037486932322891646, + "loss": 0.4584, + "step": 335 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037465963465298886, + "loss": 0.5428, + "step": 336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003744491340452913, + "loss": 0.3927, + "step": 337 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003742378223844882, + "loss": 0.5478, + "step": 338 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003740257006530147, + "loss": 0.469, + "step": 339 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037381276983707246, + "loss": 0.5169, + "step": 340 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037359903092662434, + "loss": 0.4797, + "step": 341 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037338448491539054, + "loss": 0.5315, + "step": 342 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037316913280084353, + "loss": 0.4422, + "step": 343 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003729529755842035, + "loss": 0.4426, + "step": 344 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003727360142704337, + "loss": 0.4718, + "step": 345 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003725182498682361, + "loss": 0.5585, + "step": 346 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003722996833900459, + "loss": 0.4775, + "step": 347 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003720803158520279, + "loss": 0.6014, + "step": 348 + }, + { + "epoch": 0.67, + "learning_rate": 0.00037186014827407076, + "loss": 0.5117, + "step": 349 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003716391816797829, + "loss": 0.5404, + "step": 350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003714174170964876, + "loss": 0.527, + "step": 351 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037119485555521796, + "loss": 0.4555, + "step": 352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037097149809071255, + "loss": 0.5372, + "step": 353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037074734574141016, + "loss": 0.5377, + "step": 354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003705223995494454, + "loss": 0.4925, + "step": 355 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037029666056064345, + "loss": 0.482, + "step": 356 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037007012982451546, + "loss": 0.5235, + "step": 357 + }, + { + "epoch": 0.69, + "learning_rate": 0.00036984280839425356, + "loss": 0.4957, + "step": 358 + }, + { + "epoch": 0.69, + "learning_rate": 0.000369614697326726, + "loss": 0.5379, + "step": 359 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003693857976824721, + "loss": 0.4653, + "step": 360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036915611052569785, + "loss": 0.469, + "step": 361 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003689256369242702, + "loss": 0.5618, + "step": 362 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003686943779497124, + "loss": 0.4459, + "step": 363 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003684623346771995, + "loss": 0.5606, + "step": 364 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003682295081855524, + "loss": 0.4368, + "step": 365 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036799589955723375, + "loss": 0.4168, + "step": 366 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036776150987834243, + "loss": 0.4664, + "step": 367 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036752634023860846, + "loss": 0.4737, + "step": 368 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003672903917313883, + "loss": 0.4247, + "step": 369 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036705366545365935, + "loss": 0.5677, + "step": 370 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036681616250601505, + "loss": 0.5441, + "step": 371 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003665778839926599, + "loss": 0.6247, + "step": 372 + }, + { + "epoch": 0.72, + "learning_rate": 0.00036633883102140405, + "loss": 0.5217, + "step": 373 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003660990047036584, + "loss": 0.4651, + "step": 374 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003658584061544291, + "loss": 0.4648, + "step": 375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003656170364923128, + "loss": 0.6048, + "step": 376 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036537489683949114, + "loss": 0.4515, + "step": 377 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003651319883217255, + "loss": 0.5096, + "step": 378 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036488831206835207, + "loss": 0.4231, + "step": 379 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036464386921227637, + "loss": 0.4903, + "step": 380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036439866088996796, + "loss": 0.5131, + "step": 381 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003641526882414553, + "loss": 0.5986, + "step": 382 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003639059524103203, + "loss": 0.6, + "step": 383 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003636584545436931, + "loss": 0.5216, + "step": 384 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003634101957922468, + "loss": 0.5144, + "step": 385 + }, + { + "epoch": 0.74, + "learning_rate": 0.00036316117731019184, + "loss": 0.4963, + "step": 386 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003629114002552711, + "loss": 0.5657, + "step": 387 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036266086578875384, + "loss": 0.5028, + "step": 388 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003624095750754311, + "loss": 0.573, + "step": 389 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036215752928360967, + "loss": 0.5199, + "step": 390 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003619047295851068, + "loss": 0.656, + "step": 391 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036165117715524506, + "loss": 0.5129, + "step": 392 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036139687317284647, + "loss": 0.3945, + "step": 393 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003611418188202271, + "loss": 0.5318, + "step": 394 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036088601528319196, + "loss": 0.5344, + "step": 395 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036062946375102885, + "loss": 0.5407, + "step": 396 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003603721654165034, + "loss": 0.5364, + "step": 397 + }, + { + "epoch": 0.77, + "learning_rate": 0.00036011412147585306, + "loss": 0.5407, + "step": 398 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003598553331287821, + "loss": 0.5999, + "step": 399 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003595958015784555, + "loss": 0.624, + "step": 400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00035933552803149354, + "loss": 0.5351, + "step": 401 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003590745136979662, + "loss": 0.5196, + "step": 402 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035881275979138765, + "loss": 0.5447, + "step": 403 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003585502675287104, + "loss": 0.4908, + "step": 404 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035828703813031986, + "loss": 0.5172, + "step": 405 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035802307282002834, + "loss": 0.5923, + "step": 406 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003577583728250699, + "loss": 0.568, + "step": 407 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035749293937609395, + "loss": 0.4618, + "step": 408 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003572267737071601, + "loss": 0.5351, + "step": 409 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003569598770557322, + "loss": 0.5285, + "step": 410 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035669225066267256, + "loss": 0.4571, + "step": 411 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035642389577223625, + "loss": 0.4214, + "step": 412 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003561548136320653, + "loss": 0.5393, + "step": 413 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003558850054931828, + "loss": 0.549, + "step": 414 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035561447260998714, + "loss": 0.4824, + "step": 415 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035534321624024656, + "loss": 0.6244, + "step": 416 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035507123764509245, + "loss": 0.5436, + "step": 417 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003547985380890144, + "loss": 0.5198, + "step": 418 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035452511883985366, + "loss": 0.5979, + "step": 419 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035425098116879754, + "loss": 0.4158, + "step": 420 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035397612635037356, + "loss": 0.5125, + "step": 421 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035370055566244334, + "loss": 0.4699, + "step": 422 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003534242703861966, + "loss": 0.5553, + "step": 423 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035314727180614573, + "loss": 0.5969, + "step": 424 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035286956121011897, + "loss": 0.456, + "step": 425 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003525911398892552, + "loss": 0.5195, + "step": 426 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003523120091379975, + "loss": 0.5187, + "step": 427 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035203217025408726, + "loss": 0.5443, + "step": 428 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003517516245385582, + "loss": 0.4476, + "step": 429 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003514703732957301, + "loss": 0.5757, + "step": 430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035118841783320304, + "loss": 0.5129, + "step": 431 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035090575946185114, + "loss": 0.6354, + "step": 432 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035062239949581645, + "loss": 0.4065, + "step": 433 + }, + { + "epoch": 0.84, + "learning_rate": 0.000350338339252503, + "loss": 0.5472, + "step": 434 + }, + { + "epoch": 0.84, + "learning_rate": 0.00035005358005257045, + "loss": 0.5424, + "step": 435 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034976812321992816, + "loss": 0.6127, + "step": 436 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034948197008172877, + "loss": 0.63, + "step": 437 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003491951219683625, + "loss": 0.413, + "step": 438 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034890758021345034, + "loss": 0.5435, + "step": 439 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034861934615383844, + "loss": 0.5433, + "step": 440 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034833042112959153, + "loss": 0.4763, + "step": 441 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034804080648398667, + "loss": 0.5727, + "step": 442 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034775050356350727, + "loss": 0.5392, + "step": 443 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034745951371783666, + "loss": 0.4981, + "step": 444 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003471678382998518, + "loss": 0.5516, + "step": 445 + }, + { + "epoch": 0.86, + "learning_rate": 0.00034687547866561703, + "loss": 0.4965, + "step": 446 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003465824361743779, + "loss": 0.4982, + "step": 447 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003462887121885544, + "loss": 0.5619, + "step": 448 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003459943080737353, + "loss": 0.5273, + "step": 449 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034569922519867133, + "loss": 0.517, + "step": 450 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034540346493526876, + "loss": 0.4874, + "step": 451 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003451070286585833, + "loss": 0.5966, + "step": 452 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003448099177468137, + "loss": 0.4487, + "step": 453 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003445121335812951, + "loss": 0.5091, + "step": 454 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003442136775464929, + "loss": 0.407, + "step": 455 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003439145510299958, + "loss": 0.6327, + "step": 456 + }, + { + "epoch": 0.88, + "learning_rate": 0.00034361475542251025, + "loss": 0.4217, + "step": 457 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003433142921178531, + "loss": 0.6102, + "step": 458 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003430131625129456, + "loss": 0.5556, + "step": 459 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034271136800780673, + "loss": 0.4986, + "step": 460 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003424089100055467, + "loss": 0.5406, + "step": 461 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034210578991236056, + "loss": 0.5881, + "step": 462 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034180200913752157, + "loss": 0.4869, + "step": 463 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034149756909337454, + "loss": 0.5626, + "step": 464 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003411924711953295, + "loss": 0.564, + "step": 465 + }, + { + "epoch": 0.9, + "learning_rate": 0.00034088671686185486, + "loss": 0.6272, + "step": 466 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003405803075144711, + "loss": 0.4643, + "step": 467 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003402732445777438, + "loss": 0.5435, + "step": 468 + }, + { + "epoch": 0.9, + "learning_rate": 0.00033996552947927744, + "loss": 0.5844, + "step": 469 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003396571636497084, + "loss": 0.5362, + "step": 470 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033934814852269865, + "loss": 0.5607, + "step": 471 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003390384855349285, + "loss": 0.4836, + "step": 472 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033872817612609065, + "loss": 0.6555, + "step": 473 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033841722173888315, + "loss": 0.4784, + "step": 474 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033810562381900253, + "loss": 0.5583, + "step": 475 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033779338381513736, + "loss": 0.4679, + "step": 476 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003374805031789613, + "loss": 0.5325, + "step": 477 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033716698336512654, + "loss": 0.6601, + "step": 478 + }, + { + "epoch": 0.92, + "learning_rate": 0.000336852825831257, + "loss": 0.4838, + "step": 479 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003365380320379414, + "loss": 0.5588, + "step": 480 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033622260344872665, + "loss": 0.4596, + "step": 481 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003359065415301108, + "loss": 0.5228, + "step": 482 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033558984775153663, + "loss": 0.5316, + "step": 483 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033527252358538437, + "loss": 0.4761, + "step": 484 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003349545705069653, + "loss": 0.5254, + "step": 485 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003346359899945144, + "loss": 0.4786, + "step": 486 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033431678352918384, + "loss": 0.4302, + "step": 487 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003339969525950361, + "loss": 0.4914, + "step": 488 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033367649867903663, + "loss": 0.4102, + "step": 489 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003333554232710477, + "loss": 0.4698, + "step": 490 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003330337278638207, + "loss": 0.4454, + "step": 491 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033271141395298964, + "loss": 0.4648, + "step": 492 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033238848303706415, + "loss": 0.4616, + "step": 493 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033206493661742237, + "loss": 0.4861, + "step": 494 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033174077619830416, + "loss": 0.4797, + "step": 495 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033141600328680373, + "loss": 0.5104, + "step": 496 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033109061939286336, + "loss": 0.5712, + "step": 497 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033076462602926553, + "loss": 0.5425, + "step": 498 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033043802471162636, + "loss": 0.6156, + "step": 499 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003301108169583887, + "loss": 0.4282, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003297830042908146, + "loss": 0.4088, + "step": 501 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032945458823297857, + "loss": 0.4866, + "step": 502 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003291255703117605, + "loss": 0.5045, + "step": 503 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003287959520568384, + "loss": 0.491, + "step": 504 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032846573500068136, + "loss": 0.458, + "step": 505 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032813492067854246, + "loss": 0.4508, + "step": 506 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003278035106284516, + "loss": 0.4294, + "step": 507 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032747150639120834, + "loss": 0.4834, + "step": 508 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032713890951037477, + "loss": 0.3857, + "step": 509 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032680572153226834, + "loss": 0.4072, + "step": 510 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003264719440059545, + "loss": 0.4028, + "step": 511 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032613757848323977, + "loss": 0.3789, + "step": 512 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032580262651866446, + "loss": 0.4944, + "step": 513 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003254670896694952, + "loss": 0.4259, + "step": 514 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032513096949571805, + "loss": 0.5037, + "step": 515 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032479426756003093, + "loss": 0.5857, + "step": 516 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003244569854278366, + "loss": 0.5407, + "step": 517 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032411912466723524, + "loss": 0.499, + "step": 518 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003237806868490172, + "loss": 0.4359, + "step": 519 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032344167354665573, + "loss": 0.4374, + "step": 520 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003231020863362997, + "loss": 0.4172, + "step": 521 + }, + { + "epoch": 1.01, + "learning_rate": 0.000322761926796766, + "loss": 0.4451, + "step": 522 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003224211965095326, + "loss": 0.4, + "step": 523 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003220798970587309, + "loss": 0.4009, + "step": 524 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003217380300311386, + "loss": 0.3966, + "step": 525 + }, + { + "epoch": 1.01, + "learning_rate": 0.000321395597016172, + "loss": 0.4255, + "step": 526 + }, + { + "epoch": 1.01, + "learning_rate": 0.00032105259960587895, + "loss": 0.4707, + "step": 527 + }, + { + "epoch": 1.02, + "learning_rate": 0.00032070903939493124, + "loss": 0.5313, + "step": 528 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003203649179806172, + "loss": 0.3596, + "step": 529 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003200202369628345, + "loss": 0.5223, + "step": 530 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031967499794408234, + "loss": 0.4146, + "step": 531 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031932920252945423, + "loss": 0.4328, + "step": 532 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003189828523266306, + "loss": 0.4258, + "step": 533 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031863594894587105, + "loss": 0.4457, + "step": 534 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003182884940000072, + "loss": 0.5249, + "step": 535 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003179404891044348, + "loss": 0.4751, + "step": 536 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031759193587710676, + "loss": 0.5378, + "step": 537 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031724283593852497, + "loss": 0.634, + "step": 538 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031689319091173326, + "loss": 0.4298, + "step": 539 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031654300242230977, + "loss": 0.5469, + "step": 540 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031619227209835917, + "loss": 0.5153, + "step": 541 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003158410015705053, + "loss": 0.4144, + "step": 542 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003154891924718837, + "loss": 0.6041, + "step": 543 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003151368464381335, + "loss": 0.4891, + "step": 544 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003147839651073904, + "loss": 0.5258, + "step": 545 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031443055012027874, + "loss": 0.4351, + "step": 546 + }, + { + "epoch": 1.05, + "learning_rate": 0.000314076603119904, + "loss": 0.4556, + "step": 547 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031372212575184514, + "loss": 0.5445, + "step": 548 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031336711966414675, + "loss": 0.5585, + "step": 549 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003130115865073117, + "loss": 0.367, + "step": 550 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003126555279342933, + "loss": 0.4877, + "step": 551 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003122989456004876, + "loss": 0.4335, + "step": 552 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003119418411637258, + "loss": 0.4383, + "step": 553 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003115842162842663, + "loss": 0.4508, + "step": 554 + }, + { + "epoch": 1.07, + "learning_rate": 0.00031122607262478743, + "loss": 0.4631, + "step": 555 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003108674118503793, + "loss": 0.3496, + "step": 556 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003105082356285361, + "loss": 0.4108, + "step": 557 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003101485456291486, + "loss": 0.4877, + "step": 558 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030978834352449614, + "loss": 0.3696, + "step": 559 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030942763098923913, + "loss": 0.5138, + "step": 560 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030906640970041084, + "loss": 0.5961, + "step": 561 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003087046813374099, + "loss": 0.3824, + "step": 562 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030834244758199276, + "loss": 0.4925, + "step": 563 + }, + { + "epoch": 1.09, + "learning_rate": 0.000307979710118265, + "loss": 0.4511, + "step": 564 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030761647063267457, + "loss": 0.4306, + "step": 565 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003072527308140031, + "loss": 0.468, + "step": 566 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030688849235335856, + "loss": 0.4842, + "step": 567 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003065237569441671, + "loss": 0.4332, + "step": 568 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030615852628216537, + "loss": 0.4637, + "step": 569 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003057928020653925, + "loss": 0.6193, + "step": 570 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003054265859941824, + "loss": 0.5033, + "step": 571 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030505987977115555, + "loss": 0.4185, + "step": 572 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003046926851012114, + "loss": 0.4211, + "step": 573 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003043250036915201, + "loss": 0.5089, + "step": 574 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030395683725151505, + "loss": 0.517, + "step": 575 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003035881874928845, + "loss": 0.492, + "step": 576 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003032190561295636, + "loss": 0.4535, + "step": 577 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003028494448777269, + "loss": 0.3947, + "step": 578 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030247935545577986, + "loss": 0.3125, + "step": 579 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003021087895843511, + "loss": 0.3882, + "step": 580 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003017377489862845, + "loss": 0.4802, + "step": 581 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030136623538663083, + "loss": 0.4652, + "step": 582 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030099425051263994, + "loss": 0.3816, + "step": 583 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003006217960937529, + "loss": 0.4583, + "step": 584 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030024887386159385, + "loss": 0.4568, + "step": 585 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029987548554996174, + "loss": 0.3908, + "step": 586 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002995016328948225, + "loss": 0.4235, + "step": 587 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029912731763430075, + "loss": 0.4138, + "step": 588 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029875254150867216, + "loss": 0.5838, + "step": 589 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002983773062603548, + "loss": 0.462, + "step": 590 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029800161363390145, + "loss": 0.4632, + "step": 591 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029762546537599125, + "loss": 0.5898, + "step": 592 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002972488632354218, + "loss": 0.4742, + "step": 593 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029687180896310065, + "loss": 0.4579, + "step": 594 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002964943043120378, + "loss": 0.5514, + "step": 595 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029611635103733675, + "loss": 0.4304, + "step": 596 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002957379508961871, + "loss": 0.4383, + "step": 597 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029535910564785584, + "loss": 0.5327, + "step": 598 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029497981705367933, + "loss": 0.4781, + "step": 599 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029460008687705525, + "loss": 0.4178, + "step": 600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002942199168834342, + "loss": 0.3987, + "step": 601 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029383930884031183, + "loss": 0.3861, + "step": 602 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029345826451722005, + "loss": 0.5322, + "step": 603 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029307678568571936, + "loss": 0.3997, + "step": 604 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002926948741193903, + "loss": 0.4121, + "step": 605 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029231253159382514, + "loss": 0.4931, + "step": 606 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029192975988662017, + "loss": 0.4626, + "step": 607 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029154656077736666, + "loss": 0.4441, + "step": 608 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002911629360476432, + "loss": 0.3863, + "step": 609 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029077888748100703, + "loss": 0.36, + "step": 610 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029039441686298594, + "loss": 0.4246, + "step": 611 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002900095259810702, + "loss": 0.4916, + "step": 612 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028962421662470346, + "loss": 0.4896, + "step": 613 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028923849058527535, + "loss": 0.4237, + "step": 614 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028885234965611274, + "loss": 0.5727, + "step": 615 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028846579563247116, + "loss": 0.5681, + "step": 616 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002880788303115269, + "loss": 0.4383, + "step": 617 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028769145549236845, + "loss": 0.4962, + "step": 618 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002873036729759881, + "loss": 0.5472, + "step": 619 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002869154845652738, + "loss": 0.5431, + "step": 620 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002865268920650003, + "loss": 0.4152, + "step": 621 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002861378972818211, + "loss": 0.3922, + "step": 622 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002857485020242602, + "loss": 0.5129, + "step": 623 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002853587081027034, + "loss": 0.4328, + "step": 624 + }, + { + "epoch": 1.2, + "learning_rate": 0.00028496851732938997, + "loss": 0.4431, + "step": 625 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002845779315184042, + "loss": 0.4968, + "step": 626 + }, + { + "epoch": 1.21, + "learning_rate": 0.000284186952485667, + "loss": 0.5301, + "step": 627 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002837955820489276, + "loss": 0.4332, + "step": 628 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002834038220277546, + "loss": 0.4245, + "step": 629 + }, + { + "epoch": 1.21, + "learning_rate": 0.00028301167424352836, + "loss": 0.5057, + "step": 630 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028261914051943166, + "loss": 0.4623, + "step": 631 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028222622268044174, + "loss": 0.5452, + "step": 632 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028183292255332164, + "loss": 0.5238, + "step": 633 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028143924196661176, + "loss": 0.3966, + "step": 634 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002810451827506214, + "loss": 0.35, + "step": 635 + }, + { + "epoch": 1.23, + "learning_rate": 0.00028065074673742007, + "loss": 0.4325, + "step": 636 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002802559357608292, + "loss": 0.4854, + "step": 637 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027986075165641343, + "loss": 0.4254, + "step": 638 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027946519626147225, + "loss": 0.4614, + "step": 639 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027906927141503125, + "loss": 0.3798, + "step": 640 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027867297895783373, + "loss": 0.4742, + "step": 641 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002782763207323322, + "loss": 0.4007, + "step": 642 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002778792985826795, + "loss": 0.4383, + "step": 643 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002774819143547206, + "loss": 0.4298, + "step": 644 + }, + { + "epoch": 1.24, + "learning_rate": 0.00027708416989598387, + "loss": 0.5178, + "step": 645 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002766860670556722, + "loss": 0.3434, + "step": 646 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002762876076846551, + "loss": 0.3862, + "step": 647 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027588879363545934, + "loss": 0.4459, + "step": 648 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002754896267622608, + "loss": 0.3934, + "step": 649 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027509010892087565, + "loss": 0.4349, + "step": 650 + }, + { + "epoch": 1.25, + "learning_rate": 0.000274690241968752, + "loss": 0.4178, + "step": 651 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002742900277649607, + "loss": 0.4151, + "step": 652 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002738894681701874, + "loss": 0.3888, + "step": 653 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027348856504672323, + "loss": 0.4214, + "step": 654 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002730873202584567, + "loss": 0.519, + "step": 655 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027268573567086477, + "loss": 0.5463, + "step": 656 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027228381315100417, + "loss": 0.3367, + "step": 657 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027188155456750256, + "loss": 0.4629, + "step": 658 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027147896179055043, + "loss": 0.4456, + "step": 659 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002710760366918917, + "loss": 0.4348, + "step": 660 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002706727811448153, + "loss": 0.4505, + "step": 661 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002702691970241468, + "loss": 0.5028, + "step": 662 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026986528620623904, + "loss": 0.5257, + "step": 663 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026946105056896403, + "loss": 0.4977, + "step": 664 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026905649199170377, + "loss": 0.421, + "step": 665 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002686516123553417, + "loss": 0.4931, + "step": 666 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026824641354225397, + "loss": 0.5818, + "step": 667 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002678408974363005, + "loss": 0.4211, + "step": 668 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026743506592281674, + "loss": 0.5182, + "step": 669 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026702892088860413, + "loss": 0.5591, + "step": 670 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002666224642219221, + "loss": 0.5363, + "step": 671 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002662156978124786, + "loss": 0.5866, + "step": 672 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002658086235514218, + "loss": 0.422, + "step": 673 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002654012433313312, + "loss": 0.5375, + "step": 674 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002649935590462087, + "loss": 0.4752, + "step": 675 + }, + { + "epoch": 1.3, + "learning_rate": 0.00026458557259146986, + "loss": 0.4271, + "step": 676 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002641772858639351, + "loss": 0.4843, + "step": 677 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026376870076182086, + "loss": 0.4827, + "step": 678 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026335981918473086, + "loss": 0.47, + "step": 679 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002629506430336472, + "loss": 0.368, + "step": 680 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026254117421092133, + "loss": 0.481, + "step": 681 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002621314146202656, + "loss": 0.4153, + "step": 682 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002617213661667443, + "loss": 0.4397, + "step": 683 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002613110307567643, + "loss": 0.4052, + "step": 684 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026090041029806695, + "loss": 0.4652, + "step": 685 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026048950669971884, + "loss": 0.3826, + "step": 686 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026007832187210277, + "loss": 0.5639, + "step": 687 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025966685772690906, + "loss": 0.3917, + "step": 688 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025925511617712685, + "loss": 0.5248, + "step": 689 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002588430991370347, + "loss": 0.3796, + "step": 690 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002584308085221922, + "loss": 0.4391, + "step": 691 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025801824624943084, + "loss": 0.4514, + "step": 692 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025760541423684496, + "loss": 0.5046, + "step": 693 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002571923144037831, + "loss": 0.4578, + "step": 694 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002567789486708389, + "loss": 0.4681, + "step": 695 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025636531895984236, + "loss": 0.4501, + "step": 696 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002559514271938506, + "loss": 0.4411, + "step": 697 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025553727529713916, + "loss": 0.401, + "step": 698 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025512286519519293, + "loss": 0.4911, + "step": 699 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002547081988146974, + "loss": 0.3754, + "step": 700 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025429327808352946, + "loss": 0.3807, + "step": 701 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002538781049307486, + "loss": 0.4193, + "step": 702 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002534626812865876, + "loss": 0.5259, + "step": 703 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025304700908244433, + "loss": 0.3684, + "step": 704 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002526310902508718, + "loss": 0.5423, + "step": 705 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002522149267255699, + "loss": 0.4288, + "step": 706 + }, + { + "epoch": 1.36, + "learning_rate": 0.000251798520441376, + "loss": 0.5046, + "step": 707 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002513818733342564, + "loss": 0.3777, + "step": 708 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025096498734129667, + "loss": 0.5171, + "step": 709 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002505478644006932, + "loss": 0.3785, + "step": 710 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025013050645174414, + "loss": 0.5413, + "step": 711 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024971291543483994, + "loss": 0.5018, + "step": 712 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024929509329145477, + "loss": 0.5212, + "step": 713 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024887704196413746, + "loss": 0.483, + "step": 714 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002484587633965023, + "loss": 0.3684, + "step": 715 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024804025953322005, + "loss": 0.3782, + "step": 716 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024762153232000877, + "loss": 0.4995, + "step": 717 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002472025837036253, + "loss": 0.4324, + "step": 718 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002467834156318555, + "loss": 0.5203, + "step": 719 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002463640300535057, + "loss": 0.423, + "step": 720 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002459444289183933, + "loss": 0.4537, + "step": 721 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024552461417733817, + "loss": 0.4124, + "step": 722 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002451045877821528, + "loss": 0.4865, + "step": 723 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002446843516856343, + "loss": 0.4845, + "step": 724 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024426390784155425, + "loss": 0.4174, + "step": 725 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024384325820465033, + "loss": 0.4456, + "step": 726 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002434224047306169, + "loss": 0.4429, + "step": 727 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002430013493760961, + "loss": 0.363, + "step": 728 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024258009409866853, + "loss": 0.4769, + "step": 729 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024215864085684442, + "loss": 0.4597, + "step": 730 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024173699161005429, + "loss": 0.366, + "step": 731 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024131514831863995, + "loss": 0.4746, + "step": 732 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002408931129438453, + "loss": 0.5608, + "step": 733 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024047088744780744, + "loss": 0.4292, + "step": 734 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024004847379354726, + "loss": 0.4743, + "step": 735 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023962587394496038, + "loss": 0.3855, + "step": 736 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023920308986680834, + "loss": 0.4573, + "step": 737 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023878012352470892, + "loss": 0.3937, + "step": 738 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002383569768851274, + "loss": 0.4371, + "step": 739 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023793365191536735, + "loss": 0.5432, + "step": 740 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023751015058356135, + "loss": 0.4803, + "step": 741 + }, + { + "epoch": 1.43, + "learning_rate": 0.000237086474858662, + "loss": 0.4281, + "step": 742 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023666262671043263, + "loss": 0.4031, + "step": 743 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023623860810943826, + "loss": 0.4725, + "step": 744 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002358144210270364, + "loss": 0.4644, + "step": 745 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023539006743536774, + "loss": 0.4848, + "step": 746 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023496554930734718, + "loss": 0.4084, + "step": 747 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023454086861665472, + "loss": 0.4322, + "step": 748 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023411602733772595, + "loss": 0.4847, + "step": 749 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023369102744574312, + "loss": 0.4298, + "step": 750 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023326587091662603, + "loss": 0.4268, + "step": 751 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023284055972702254, + "loss": 0.4089, + "step": 752 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002324150958542997, + "loss": 0.4214, + "step": 753 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023198948127653446, + "loss": 0.5576, + "step": 754 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023156371797250418, + "loss": 0.4377, + "step": 755 + }, + { + "epoch": 1.46, + "learning_rate": 0.00023113780792167785, + "loss": 0.4934, + "step": 756 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002307117531042068, + "loss": 0.3698, + "step": 757 + }, + { + "epoch": 1.46, + "learning_rate": 0.00023028555550091536, + "loss": 0.4722, + "step": 758 + }, + { + "epoch": 1.46, + "learning_rate": 0.00022985921709329157, + "loss": 0.3837, + "step": 759 + }, + { + "epoch": 1.46, + "learning_rate": 0.00022943273986347822, + "loss": 0.5132, + "step": 760 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002290061257942635, + "loss": 0.487, + "step": 761 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022857937686907183, + "loss": 0.3857, + "step": 762 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022815249507195445, + "loss": 0.4135, + "step": 763 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022772548238758064, + "loss": 0.4639, + "step": 764 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022729834080122791, + "loss": 0.5297, + "step": 765 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022687107229877324, + "loss": 0.4485, + "step": 766 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022644367886668357, + "loss": 0.467, + "step": 767 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022601616249200675, + "loss": 0.4304, + "step": 768 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022558852516236217, + "loss": 0.5531, + "step": 769 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022516076886593158, + "loss": 0.5021, + "step": 770 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022473289559144988, + "loss": 0.433, + "step": 771 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022430490732819566, + "loss": 0.505, + "step": 772 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022387680606598235, + "loss": 0.4677, + "step": 773 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022344859379514858, + "loss": 0.4421, + "step": 774 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022302027250654905, + "loss": 0.4282, + "step": 775 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002225918441915456, + "loss": 0.366, + "step": 776 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022216331084199724, + "loss": 0.4147, + "step": 777 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022173467445025158, + "loss": 0.586, + "step": 778 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022130593700913522, + "loss": 0.5285, + "step": 779 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022087710051194463, + "loss": 0.4484, + "step": 780 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002204481669524367, + "loss": 0.4063, + "step": 781 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002200191383248197, + "loss": 0.4751, + "step": 782 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021959001662374373, + "loss": 0.3936, + "step": 783 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021916080384429184, + "loss": 0.4433, + "step": 784 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002187315019819703, + "loss": 0.4883, + "step": 785 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021830211303269965, + "loss": 0.4925, + "step": 786 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021787263899280537, + "loss": 0.4597, + "step": 787 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021744308185900848, + "loss": 0.4954, + "step": 788 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021701344362841626, + "loss": 0.4025, + "step": 789 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021658372629851318, + "loss": 0.5734, + "step": 790 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021615393186715128, + "loss": 0.3779, + "step": 791 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021572406233254116, + "loss": 0.4994, + "step": 792 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021529411969324275, + "loss": 0.5359, + "step": 793 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021486410594815554, + "loss": 0.4738, + "step": 794 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021443402309650979, + "loss": 0.4915, + "step": 795 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021400387313785704, + "loss": 0.4991, + "step": 796 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021357365807206087, + "loss": 0.4503, + "step": 797 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002131433798992874, + "loss": 0.4887, + "step": 798 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021271304061999633, + "loss": 0.4279, + "step": 799 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021228264223493139, + "loss": 0.5367, + "step": 800 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021185218674511097, + "loss": 0.3212, + "step": 801 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021142167615181915, + "loss": 0.493, + "step": 802 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002109911124565962, + "loss": 0.3635, + "step": 803 + }, + { + "epoch": 1.55, + "learning_rate": 0.00021056049766122916, + "loss": 0.3494, + "step": 804 + }, + { + "epoch": 1.55, + "learning_rate": 0.00021012983376774254, + "loss": 0.3536, + "step": 805 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002096991227783895, + "loss": 0.4335, + "step": 806 + }, + { + "epoch": 1.55, + "learning_rate": 0.00020926836669564168, + "loss": 0.4673, + "step": 807 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020883756752218075, + "loss": 0.5269, + "step": 808 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002084067272608886, + "loss": 0.4365, + "step": 809 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020797584791483806, + "loss": 0.2998, + "step": 810 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020754493148728375, + "loss": 0.4318, + "step": 811 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020711397998165264, + "loss": 0.3205, + "step": 812 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020668299540153493, + "loss": 0.5026, + "step": 813 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020625197975067438, + "loss": 0.3923, + "step": 814 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002058209350329594, + "loss": 0.3026, + "step": 815 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020538986325241342, + "loss": 0.4729, + "step": 816 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020495876641318567, + "loss": 0.3236, + "step": 817 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002045276465195419, + "loss": 0.4934, + "step": 818 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020409650557585523, + "loss": 0.3791, + "step": 819 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020366534558659635, + "loss": 0.3879, + "step": 820 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020323416855632477, + "loss": 0.3807, + "step": 821 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020280297648967897, + "loss": 0.415, + "step": 822 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020237177139136758, + "loss": 0.3824, + "step": 823 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002019405552661596, + "loss": 0.3854, + "step": 824 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020150933011887543, + "loss": 0.3781, + "step": 825 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020107809795437745, + "loss": 0.3904, + "step": 826 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020064686077756057, + "loss": 0.3213, + "step": 827 + }, + { + "epoch": 1.6, + "learning_rate": 0.00020021562059334302, + "loss": 0.4888, + "step": 828 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019978437940665702, + "loss": 0.3375, + "step": 829 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001993531392224394, + "loss": 0.4325, + "step": 830 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019892190204562257, + "loss": 0.2865, + "step": 831 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001984906698811246, + "loss": 0.3984, + "step": 832 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019805944473384038, + "loss": 0.3011, + "step": 833 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019762822860863247, + "loss": 0.4063, + "step": 834 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019719702351032105, + "loss": 0.4489, + "step": 835 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019676583144367525, + "loss": 0.3991, + "step": 836 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019633465441340367, + "loss": 0.2736, + "step": 837 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019590349442414484, + "loss": 0.3842, + "step": 838 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001954723534804581, + "loss": 0.346, + "step": 839 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001950412335868144, + "loss": 0.3435, + "step": 840 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019461013674758668, + "loss": 0.4139, + "step": 841 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019417906496704064, + "loss": 0.4031, + "step": 842 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019374802024932567, + "loss": 0.352, + "step": 843 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019331700459846517, + "loss": 0.332, + "step": 844 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019288602001834735, + "loss": 0.3822, + "step": 845 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019245506851271632, + "loss": 0.3912, + "step": 846 + }, + { + "epoch": 1.63, + "learning_rate": 0.000192024152085162, + "loss": 0.4028, + "step": 847 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019159327273911145, + "loss": 0.3625, + "step": 848 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019116243247781927, + "loss": 0.392, + "step": 849 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019073163330435842, + "loss": 0.3687, + "step": 850 + } + ], + "logging_steps": 1, + "max_steps": 1557, + "num_train_epochs": 3, + "save_steps": 50, + "total_flos": 1.1381421486723564e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-850/training_args.bin b/checkpoint-850/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c4843df7aa1383a371fb28dea27d303b1a1145e1 --- /dev/null +++ b/checkpoint-850/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:637e0437b5818f76ea2fea2aa5b87010fc39a85bdfc12277d436c72e69d11811 +size 4155 diff --git a/checkpoint-900/README.md b/checkpoint-900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b32efe7366f05d1d90816d2ad9e4b06ccca46bea --- /dev/null +++ b/checkpoint-900/README.md @@ -0,0 +1,219 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 + +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-900/adapter_config.json b/checkpoint-900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e108f2da037ef6250457c67a4bedd308d97303c --- /dev/null +++ b/checkpoint-900/adapter_config.json @@ -0,0 +1,24 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-900/adapter_model.bin b/checkpoint-900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6aa9a248091ec8f16fffc872e10a7464bb96f4e --- /dev/null +++ b/checkpoint-900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dc5bb6d2dca558111c7797c47c4ff80cfc300763019d10afd07349ed4c4e7c2 +size 113314765 diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd5a0d885972b9a095f4de24f891cc01bd808b5e --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:409911090683a3164a6f094e871c2ec66d7845a881e203a0c6a8accccd39d49a +size 226653957 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8ac1e9824eadc10d5ab36b9f1e4f5b11bed815c --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5316df01292faa701fefb2dbf38969222c4d35108e40f06ea0b5b51d1ebb790f +size 14575 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..29da9fabb05fa99da84ff222f0dd6768751b4ea1 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e40400b355c64557f8d6f8ceef4b48a2f1b102c17edff2c48b8bb46663570c3 +size 627 diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..edbefbc12c210531da10262ffecc1d30baab6db5 --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,5419 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7336622938981827, + "eval_steps": 500, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6869, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.8396, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 0.7489, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7252, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.6548, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 0.8022, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6524, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6981, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 0.7488, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 0.6368, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.6891, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 0.7968, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 5.2000000000000004e-05, + "loss": 0.6912, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 5.6000000000000006e-05, + "loss": 0.8452, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 6e-05, + "loss": 0.6989, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 6.400000000000001e-05, + "loss": 0.6685, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-05, + "loss": 0.5469, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 7.2e-05, + "loss": 0.7915, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 7.6e-05, + "loss": 0.7744, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 8e-05, + "loss": 0.6804, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 8.4e-05, + "loss": 0.7796, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 8.800000000000001e-05, + "loss": 0.706, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000001e-05, + "loss": 0.6798, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 9.6e-05, + "loss": 0.6333, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001, + "loss": 0.6012, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010400000000000001, + "loss": 0.52, + "step": 26 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010800000000000001, + "loss": 0.6583, + "step": 27 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011200000000000001, + "loss": 0.7354, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 0.000116, + "loss": 0.6296, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012, + "loss": 0.6352, + "step": 30 + }, + { + "epoch": 0.06, + "learning_rate": 0.000124, + "loss": 0.6007, + "step": 31 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012800000000000002, + "loss": 0.5659, + "step": 32 + }, + { + "epoch": 0.06, + "learning_rate": 0.000132, + "loss": 0.5138, + "step": 33 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013600000000000003, + "loss": 0.6639, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014, + "loss": 0.5934, + "step": 35 + }, + { + "epoch": 0.07, + "learning_rate": 0.000144, + "loss": 0.5233, + "step": 36 + }, + { + "epoch": 0.07, + "learning_rate": 0.000148, + "loss": 0.5307, + "step": 37 + }, + { + "epoch": 0.07, + "learning_rate": 0.000152, + "loss": 0.5928, + "step": 38 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015600000000000002, + "loss": 0.5908, + "step": 39 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016, + "loss": 0.6366, + "step": 40 + }, + { + "epoch": 0.08, + "learning_rate": 0.000164, + "loss": 0.5972, + "step": 41 + }, + { + "epoch": 0.08, + "learning_rate": 0.000168, + "loss": 0.4825, + "step": 42 + }, + { + "epoch": 0.08, + "learning_rate": 0.000172, + "loss": 0.6783, + "step": 43 + }, + { + "epoch": 0.08, + "learning_rate": 0.00017600000000000002, + "loss": 0.6082, + "step": 44 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018, + "loss": 0.7633, + "step": 45 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018400000000000003, + "loss": 0.5988, + "step": 46 + }, + { + "epoch": 0.09, + "learning_rate": 0.000188, + "loss": 0.6658, + "step": 47 + }, + { + "epoch": 0.09, + "learning_rate": 0.000192, + "loss": 0.5945, + "step": 48 + }, + { + "epoch": 0.09, + "learning_rate": 0.000196, + "loss": 0.5984, + "step": 49 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002, + "loss": 0.6778, + "step": 50 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020400000000000003, + "loss": 0.6057, + "step": 51 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020800000000000001, + "loss": 0.601, + "step": 52 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021200000000000003, + "loss": 0.5566, + "step": 53 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021600000000000002, + "loss": 0.5911, + "step": 54 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022000000000000003, + "loss": 0.7636, + "step": 55 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022400000000000002, + "loss": 0.5537, + "step": 56 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022799999999999999, + "loss": 0.6037, + "step": 57 + }, + { + "epoch": 0.11, + "learning_rate": 0.000232, + "loss": 0.6474, + "step": 58 + }, + { + "epoch": 0.11, + "learning_rate": 0.000236, + "loss": 0.6483, + "step": 59 + }, + { + "epoch": 0.12, + "learning_rate": 0.00024, + "loss": 0.5021, + "step": 60 + }, + { + "epoch": 0.12, + "learning_rate": 0.000244, + "loss": 0.5347, + "step": 61 + }, + { + "epoch": 0.12, + "learning_rate": 0.000248, + "loss": 0.5791, + "step": 62 + }, + { + "epoch": 0.12, + "learning_rate": 0.000252, + "loss": 0.5407, + "step": 63 + }, + { + "epoch": 0.12, + "learning_rate": 0.00025600000000000004, + "loss": 0.5298, + "step": 64 + }, + { + "epoch": 0.13, + "learning_rate": 0.00026000000000000003, + "loss": 0.5685, + "step": 65 + }, + { + "epoch": 0.13, + "learning_rate": 0.000264, + "loss": 0.5108, + "step": 66 + }, + { + "epoch": 0.13, + "learning_rate": 0.000268, + "loss": 0.526, + "step": 67 + }, + { + "epoch": 0.13, + "learning_rate": 0.00027200000000000005, + "loss": 0.6843, + "step": 68 + }, + { + "epoch": 0.13, + "learning_rate": 0.000276, + "loss": 0.6608, + "step": 69 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028, + "loss": 0.5866, + "step": 70 + }, + { + "epoch": 0.14, + "learning_rate": 0.000284, + "loss": 0.6422, + "step": 71 + }, + { + "epoch": 0.14, + "learning_rate": 0.000288, + "loss": 0.449, + "step": 72 + }, + { + "epoch": 0.14, + "learning_rate": 0.000292, + "loss": 0.5319, + "step": 73 + }, + { + "epoch": 0.14, + "learning_rate": 0.000296, + "loss": 0.5977, + "step": 74 + }, + { + "epoch": 0.14, + "learning_rate": 0.00030000000000000003, + "loss": 0.5805, + "step": 75 + }, + { + "epoch": 0.15, + "learning_rate": 0.000304, + "loss": 0.5209, + "step": 76 + }, + { + "epoch": 0.15, + "learning_rate": 0.000308, + "loss": 0.6098, + "step": 77 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031200000000000005, + "loss": 0.4665, + "step": 78 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031600000000000004, + "loss": 0.6882, + "step": 79 + }, + { + "epoch": 0.15, + "learning_rate": 0.00032, + "loss": 0.5427, + "step": 80 + }, + { + "epoch": 0.16, + "learning_rate": 0.000324, + "loss": 0.5345, + "step": 81 + }, + { + "epoch": 0.16, + "learning_rate": 0.000328, + "loss": 0.663, + "step": 82 + }, + { + "epoch": 0.16, + "learning_rate": 0.000332, + "loss": 0.5393, + "step": 83 + }, + { + "epoch": 0.16, + "learning_rate": 0.000336, + "loss": 0.5711, + "step": 84 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034, + "loss": 0.5261, + "step": 85 + }, + { + "epoch": 0.17, + "learning_rate": 0.000344, + "loss": 0.5775, + "step": 86 + }, + { + "epoch": 0.17, + "learning_rate": 0.000348, + "loss": 0.6329, + "step": 87 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035200000000000005, + "loss": 0.4425, + "step": 88 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035600000000000003, + "loss": 0.6837, + "step": 89 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036, + "loss": 0.615, + "step": 90 + }, + { + "epoch": 0.18, + "learning_rate": 0.000364, + "loss": 0.5615, + "step": 91 + }, + { + "epoch": 0.18, + "learning_rate": 0.00036800000000000005, + "loss": 0.5434, + "step": 92 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037200000000000004, + "loss": 0.5864, + "step": 93 + }, + { + "epoch": 0.18, + "learning_rate": 0.000376, + "loss": 0.5583, + "step": 94 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038, + "loss": 0.5299, + "step": 95 + }, + { + "epoch": 0.18, + "learning_rate": 0.000384, + "loss": 0.532, + "step": 96 + }, + { + "epoch": 0.19, + "learning_rate": 0.000388, + "loss": 0.5227, + "step": 97 + }, + { + "epoch": 0.19, + "learning_rate": 0.000392, + "loss": 0.5275, + "step": 98 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039600000000000003, + "loss": 0.4541, + "step": 99 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004, + "loss": 0.6485, + "step": 100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003999995350775973, + "loss": 0.5438, + "step": 101 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999814031255063, + "loss": 0.5997, + "step": 102 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999581571134455, + "loss": 0.5322, + "step": 103 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003999925612847867, + "loss": 0.484, + "step": 104 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998837704800766, + "loss": 0.5961, + "step": 105 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998326302046085, + "loss": 0.7405, + "step": 106 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997721922592255, + "loss": 0.5802, + "step": 107 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997024569249167, + "loss": 0.769, + "step": 108 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999623424525898, + "loss": 0.5598, + "step": 109 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999535095429608, + "loss": 0.6143, + "step": 110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039994374700467095, + "loss": 0.5766, + "step": 111 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039993305488310836, + "loss": 0.7695, + "step": 112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999214332279831, + "loss": 0.7153, + "step": 113 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999088820933269, + "loss": 0.5835, + "step": 114 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039989540153749286, + "loss": 0.6634, + "step": 115 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399880991623155, + "loss": 0.6069, + "step": 116 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998656524173082, + "loss": 0.7224, + "step": 117 + }, + { + "epoch": 0.23, + "learning_rate": 0.000399849383991268, + "loss": 0.5884, + "step": 118 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998321864206699, + "loss": 0.5122, + "step": 119 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039981405978546924, + "loss": 0.6453, + "step": 120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997950041699408, + "loss": 0.4665, + "step": 121 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997750196626785, + "loss": 0.5428, + "step": 122 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039975410635659464, + "loss": 0.4365, + "step": 123 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039973226434891995, + "loss": 0.5978, + "step": 124 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039970949374120286, + "loss": 0.7729, + "step": 125 + }, + { + "epoch": 0.24, + "learning_rate": 0.000399685794639309, + "loss": 0.6212, + "step": 126 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039966116715342066, + "loss": 0.5426, + "step": 127 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039963561139803676, + "loss": 0.5782, + "step": 128 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003996091274919716, + "loss": 0.6701, + "step": 129 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995817155583548, + "loss": 0.6314, + "step": 130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995533757246307, + "loss": 0.6662, + "step": 131 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995241081225573, + "loss": 0.5192, + "step": 132 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994939128882065, + "loss": 0.5591, + "step": 133 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994627901619625, + "loss": 0.5809, + "step": 134 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994307400885219, + "loss": 0.4871, + "step": 135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993977628168928, + "loss": 0.6666, + "step": 136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993638585003938, + "loss": 0.6469, + "step": 137 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039932902729665357, + "loss": 0.5727, + "step": 138 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039929326936761036, + "loss": 0.6715, + "step": 139 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039925658487951067, + "loss": 0.5686, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039921897400290894, + "loss": 0.501, + "step": 141 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039918043691266665, + "loss": 0.5795, + "step": 142 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039914097378795124, + "loss": 0.6287, + "step": 143 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039910058481223564, + "loss": 0.7016, + "step": 144 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039905927017329726, + "loss": 0.6232, + "step": 145 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039901703006321715, + "loss": 0.5291, + "step": 146 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039897386467837903, + "loss": 0.5297, + "step": 147 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892977421946844, + "loss": 0.5784, + "step": 148 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988847588914718, + "loss": 0.5714, + "step": 149 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988388189036754, + "loss": 0.5044, + "step": 150 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987919544696646, + "loss": 0.8246, + "step": 151 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987441658073226, + "loss": 0.5048, + "step": 152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003986954531388297, + "loss": 0.5433, + "step": 153 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039864581669066186, + "loss": 0.5251, + "step": 154 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003985952566935902, + "loss": 0.5708, + "step": 155 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039854377338267936, + "loss": 0.6276, + "step": 156 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039849136699728684, + "loss": 0.4915, + "step": 157 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003984380377810617, + "loss": 0.6389, + "step": 158 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039838378598194325, + "loss": 0.6067, + "step": 159 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039832861185216045, + "loss": 0.6136, + "step": 160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003982725156482301, + "loss": 0.5597, + "step": 161 + }, + { + "epoch": 0.31, + "learning_rate": 0.000398215497630956, + "loss": 0.5957, + "step": 162 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003981575580654278, + "loss": 0.5853, + "step": 163 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980986972210194, + "loss": 0.5462, + "step": 164 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980389153713881, + "loss": 0.5302, + "step": 165 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039797821279447307, + "loss": 0.5395, + "step": 166 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039791658977249425, + "loss": 0.7004, + "step": 167 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039785404659195084, + "loss": 0.5622, + "step": 168 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039779058354362013, + "loss": 0.5759, + "step": 169 + }, + { + "epoch": 0.33, + "learning_rate": 0.000397726200922556, + "loss": 0.6184, + "step": 170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003976608990280877, + "loss": 0.5488, + "step": 171 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975946781638183, + "loss": 0.6162, + "step": 172 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975275386376236, + "loss": 0.558, + "step": 173 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003974594807616502, + "loss": 0.519, + "step": 174 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003973905048523144, + "loss": 0.6195, + "step": 175 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039732061123030064, + "loss": 0.5991, + "step": 176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003972498002205601, + "loss": 0.5428, + "step": 177 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039717807215230896, + "loss": 0.5323, + "step": 178 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039710542735902705, + "loss": 0.5307, + "step": 179 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003970318661784564, + "loss": 0.5783, + "step": 180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003969573889525993, + "loss": 0.5924, + "step": 181 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039688199602771714, + "loss": 0.5902, + "step": 182 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039680568775432855, + "loss": 0.6291, + "step": 183 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003967284644872077, + "loss": 0.5942, + "step": 184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003966503265853829, + "loss": 0.4878, + "step": 185 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003965712744121347, + "loss": 0.6487, + "step": 186 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003964913083349945, + "loss": 0.6111, + "step": 187 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039641042872574233, + "loss": 0.6072, + "step": 188 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039632863596040575, + "loss": 0.716, + "step": 189 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039624593041925763, + "loss": 0.6178, + "step": 190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003961623124868145, + "loss": 0.6323, + "step": 191 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039607778255183485, + "loss": 0.5821, + "step": 192 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959923410073174, + "loss": 0.6738, + "step": 193 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959059882504989, + "loss": 0.6203, + "step": 194 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039581872468285277, + "loss": 0.632, + "step": 195 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003957305507100868, + "loss": 0.5857, + "step": 196 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039564146674214164, + "loss": 0.6311, + "step": 197 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003955514731931885, + "loss": 0.5889, + "step": 198 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039546057048162763, + "loss": 0.5201, + "step": 199 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039536875903008607, + "loss": 0.5581, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039527603926541586, + "loss": 0.5104, + "step": 201 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039518241161869193, + "loss": 0.5978, + "step": 202 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039508787652521013, + "loss": 0.6244, + "step": 203 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039499243442448536, + "loss": 0.589, + "step": 204 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003948960857602493, + "loss": 0.575, + "step": 205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947988309804485, + "loss": 0.5494, + "step": 206 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947006705372422, + "loss": 0.4895, + "step": 207 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039460160488700036, + "loss": 0.5479, + "step": 208 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039450163449030124, + "loss": 0.5893, + "step": 209 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003944007598119297, + "loss": 0.5451, + "step": 210 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003942989813208747, + "loss": 0.5582, + "step": 211 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003941962994903273, + "loss": 0.5121, + "step": 212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039409271479767826, + "loss": 0.6324, + "step": 213 + }, + { + "epoch": 0.41, + "learning_rate": 0.000393988227724516, + "loss": 0.6118, + "step": 214 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003938828387566244, + "loss": 0.6303, + "step": 215 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003937765483839804, + "loss": 0.7705, + "step": 216 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003936693571007517, + "loss": 0.6224, + "step": 217 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003935612654052946, + "loss": 0.5664, + "step": 218 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039345227380015163, + "loss": 0.66, + "step": 219 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039334238279204906, + "loss": 0.5582, + "step": 220 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039323159289189505, + "loss": 0.6087, + "step": 221 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003931199046147764, + "loss": 0.5566, + "step": 222 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039300731847995716, + "loss": 0.5775, + "step": 223 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039289383501087534, + "loss": 0.5081, + "step": 224 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039277945473514104, + "loss": 0.5218, + "step": 225 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003926641781845338, + "loss": 0.6655, + "step": 226 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003925480058950002, + "loss": 0.5735, + "step": 227 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039243093840665114, + "loss": 0.6609, + "step": 228 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003923129762637596, + "loss": 0.7323, + "step": 229 + }, + { + "epoch": 0.44, + "learning_rate": 0.000392194120014758, + "loss": 0.5703, + "step": 230 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039207437021223583, + "loss": 0.6545, + "step": 231 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003919537274129366, + "loss": 0.521, + "step": 232 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039183219217775564, + "loss": 0.5257, + "step": 233 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003917097650717377, + "loss": 0.5487, + "step": 234 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039158644666407365, + "loss": 0.4861, + "step": 235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039146223752809845, + "loss": 0.4928, + "step": 236 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003913371382412883, + "loss": 0.5253, + "step": 237 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039121114938525756, + "loss": 0.6155, + "step": 238 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039108427154575684, + "loss": 0.55, + "step": 239 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039095650531266967, + "loss": 0.6617, + "step": 240 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039082785128000976, + "loss": 0.5198, + "step": 241 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039069831004591866, + "loss": 0.5302, + "step": 242 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003905678822126625, + "loss": 0.5347, + "step": 243 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039043656838662946, + "loss": 0.531, + "step": 244 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039030436917832697, + "loss": 0.4884, + "step": 245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039017128520237883, + "loss": 0.6027, + "step": 246 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003900373170775222, + "loss": 0.5537, + "step": 247 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038990246542660494, + "loss": 0.5753, + "step": 248 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038976673087658256, + "loss": 0.5059, + "step": 249 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038963011405851537, + "loss": 0.5118, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038949261560756565, + "loss": 0.5645, + "step": 251 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003893542361629944, + "loss": 0.5623, + "step": 252 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038921497636815866, + "loss": 0.5216, + "step": 253 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003890748368705085, + "loss": 0.4501, + "step": 254 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003889338183215838, + "loss": 0.48, + "step": 255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038879192137701135, + "loss": 0.5218, + "step": 256 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003886491466965018, + "loss": 0.5858, + "step": 257 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038850549494384685, + "loss": 0.6124, + "step": 258 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038836096678691536, + "loss": 0.4645, + "step": 259 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038821556289765136, + "loss": 0.474, + "step": 260 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038806928395207003, + "loss": 0.4364, + "step": 261 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038792213063025484, + "loss": 0.5821, + "step": 262 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003877741036163547, + "loss": 0.5393, + "step": 263 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003876252035985804, + "loss": 0.5373, + "step": 264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003874754312692013, + "loss": 0.6021, + "step": 265 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003873247873245426, + "loss": 0.4549, + "step": 266 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003871732724649817, + "loss": 0.5994, + "step": 267 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003870208873949453, + "loss": 0.4764, + "step": 268 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038686763282290556, + "loss": 0.4311, + "step": 269 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003867135094613774, + "loss": 0.5462, + "step": 270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003865585180269148, + "loss": 0.5006, + "step": 271 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003864026592401076, + "loss": 0.5347, + "step": 272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038624593382557835, + "loss": 0.5242, + "step": 273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038608834251197856, + "loss": 0.5005, + "step": 274 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038592988603198554, + "loss": 0.5436, + "step": 275 + }, + { + "epoch": 0.53, + "learning_rate": 0.000385770565122299, + "loss": 0.4658, + "step": 276 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003856103805236375, + "loss": 0.5273, + "step": 277 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038544933298073516, + "loss": 0.436, + "step": 278 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038528742324233804, + "loss": 0.4785, + "step": 279 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038512465206120086, + "loss": 0.5366, + "step": 280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038496102019408324, + "loss": 0.4448, + "step": 281 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038479652840174637, + "loss": 0.5132, + "step": 282 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038463117744894955, + "loss": 0.7918, + "step": 283 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038446496810444627, + "loss": 0.5309, + "step": 284 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038429790114098114, + "loss": 0.5316, + "step": 285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038412997733528576, + "loss": 0.4611, + "step": 286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038396119746807563, + "loss": 0.4609, + "step": 287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038379156232404613, + "loss": 0.5821, + "step": 288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003836210726918691, + "loss": 0.5883, + "step": 289 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003834497293641889, + "loss": 0.5012, + "step": 290 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038327753313761913, + "loss": 0.4457, + "step": 291 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038310448481273867, + "loss": 0.4851, + "step": 292 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038293058519408787, + "loss": 0.5622, + "step": 293 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038275583509016507, + "loss": 0.5703, + "step": 294 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038258023531342265, + "loss": 0.5718, + "step": 295 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003824037866802632, + "loss": 0.5183, + "step": 296 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038222649001103614, + "loss": 0.5085, + "step": 297 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038204834613003323, + "loss": 0.5388, + "step": 298 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038186935586548537, + "loss": 0.5425, + "step": 299 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003816895200495584, + "loss": 0.447, + "step": 300 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003815088395183493, + "loss": 0.5541, + "step": 301 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038132731511188227, + "loss": 0.5518, + "step": 302 + }, + { + "epoch": 0.58, + "learning_rate": 0.000381144947674105, + "loss": 0.5074, + "step": 303 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809617380528847, + "loss": 0.5134, + "step": 304 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807776871000037, + "loss": 0.4599, + "step": 305 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805927956711562, + "loss": 0.5838, + "step": 306 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038040706462594395, + "loss": 0.5216, + "step": 307 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038022049482787216, + "loss": 0.5323, + "step": 308 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003800330871443456, + "loss": 0.5681, + "step": 309 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037984484244666446, + "loss": 0.4172, + "step": 310 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003796557616100207, + "loss": 0.4958, + "step": 311 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003794658455134934, + "loss": 0.662, + "step": 312 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003792750950400451, + "loss": 0.5832, + "step": 313 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003790835110765174, + "loss": 0.4271, + "step": 314 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003788910945136271, + "loss": 0.4842, + "step": 315 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037869784624596186, + "loss": 0.4656, + "step": 316 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037850376717197626, + "loss": 0.4981, + "step": 317 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037830885819398733, + "loss": 0.5162, + "step": 318 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037811312021817067, + "loss": 0.652, + "step": 319 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003779165541545558, + "loss": 0.5104, + "step": 320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003777191609170225, + "loss": 0.4971, + "step": 321 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003775209414232962, + "loss": 0.4871, + "step": 322 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003773218965949436, + "loss": 0.5226, + "step": 323 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037712202735736884, + "loss": 0.4823, + "step": 324 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003769213346398087, + "loss": 0.497, + "step": 325 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003767198193753286, + "loss": 0.5976, + "step": 326 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765174825008181, + "loss": 0.4532, + "step": 327 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003763143249569868, + "loss": 0.5236, + "step": 328 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037611034768835947, + "loss": 0.6513, + "step": 329 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037590555164327224, + "loss": 0.5686, + "step": 330 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037569993777386774, + "loss": 0.456, + "step": 331 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003754935070360909, + "loss": 0.5181, + "step": 332 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003752862603896846, + "loss": 0.4765, + "step": 333 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037507819879818477, + "loss": 0.5363, + "step": 334 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037486932322891646, + "loss": 0.4584, + "step": 335 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037465963465298886, + "loss": 0.5428, + "step": 336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003744491340452913, + "loss": 0.3927, + "step": 337 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003742378223844882, + "loss": 0.5478, + "step": 338 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003740257006530147, + "loss": 0.469, + "step": 339 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037381276983707246, + "loss": 0.5169, + "step": 340 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037359903092662434, + "loss": 0.4797, + "step": 341 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037338448491539054, + "loss": 0.5315, + "step": 342 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037316913280084353, + "loss": 0.4422, + "step": 343 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003729529755842035, + "loss": 0.4426, + "step": 344 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003727360142704337, + "loss": 0.4718, + "step": 345 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003725182498682361, + "loss": 0.5585, + "step": 346 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003722996833900459, + "loss": 0.4775, + "step": 347 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003720803158520279, + "loss": 0.6014, + "step": 348 + }, + { + "epoch": 0.67, + "learning_rate": 0.00037186014827407076, + "loss": 0.5117, + "step": 349 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003716391816797829, + "loss": 0.5404, + "step": 350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003714174170964876, + "loss": 0.527, + "step": 351 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037119485555521796, + "loss": 0.4555, + "step": 352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037097149809071255, + "loss": 0.5372, + "step": 353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037074734574141016, + "loss": 0.5377, + "step": 354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003705223995494454, + "loss": 0.4925, + "step": 355 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037029666056064345, + "loss": 0.482, + "step": 356 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037007012982451546, + "loss": 0.5235, + "step": 357 + }, + { + "epoch": 0.69, + "learning_rate": 0.00036984280839425356, + "loss": 0.4957, + "step": 358 + }, + { + "epoch": 0.69, + "learning_rate": 0.000369614697326726, + "loss": 0.5379, + "step": 359 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003693857976824721, + "loss": 0.4653, + "step": 360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036915611052569785, + "loss": 0.469, + "step": 361 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003689256369242702, + "loss": 0.5618, + "step": 362 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003686943779497124, + "loss": 0.4459, + "step": 363 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003684623346771995, + "loss": 0.5606, + "step": 364 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003682295081855524, + "loss": 0.4368, + "step": 365 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036799589955723375, + "loss": 0.4168, + "step": 366 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036776150987834243, + "loss": 0.4664, + "step": 367 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036752634023860846, + "loss": 0.4737, + "step": 368 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003672903917313883, + "loss": 0.4247, + "step": 369 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036705366545365935, + "loss": 0.5677, + "step": 370 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036681616250601505, + "loss": 0.5441, + "step": 371 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003665778839926599, + "loss": 0.6247, + "step": 372 + }, + { + "epoch": 0.72, + "learning_rate": 0.00036633883102140405, + "loss": 0.5217, + "step": 373 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003660990047036584, + "loss": 0.4651, + "step": 374 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003658584061544291, + "loss": 0.4648, + "step": 375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003656170364923128, + "loss": 0.6048, + "step": 376 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036537489683949114, + "loss": 0.4515, + "step": 377 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003651319883217255, + "loss": 0.5096, + "step": 378 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036488831206835207, + "loss": 0.4231, + "step": 379 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036464386921227637, + "loss": 0.4903, + "step": 380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036439866088996796, + "loss": 0.5131, + "step": 381 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003641526882414553, + "loss": 0.5986, + "step": 382 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003639059524103203, + "loss": 0.6, + "step": 383 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003636584545436931, + "loss": 0.5216, + "step": 384 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003634101957922468, + "loss": 0.5144, + "step": 385 + }, + { + "epoch": 0.74, + "learning_rate": 0.00036316117731019184, + "loss": 0.4963, + "step": 386 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003629114002552711, + "loss": 0.5657, + "step": 387 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036266086578875384, + "loss": 0.5028, + "step": 388 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003624095750754311, + "loss": 0.573, + "step": 389 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036215752928360967, + "loss": 0.5199, + "step": 390 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003619047295851068, + "loss": 0.656, + "step": 391 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036165117715524506, + "loss": 0.5129, + "step": 392 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036139687317284647, + "loss": 0.3945, + "step": 393 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003611418188202271, + "loss": 0.5318, + "step": 394 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036088601528319196, + "loss": 0.5344, + "step": 395 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036062946375102885, + "loss": 0.5407, + "step": 396 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003603721654165034, + "loss": 0.5364, + "step": 397 + }, + { + "epoch": 0.77, + "learning_rate": 0.00036011412147585306, + "loss": 0.5407, + "step": 398 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003598553331287821, + "loss": 0.5999, + "step": 399 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003595958015784555, + "loss": 0.624, + "step": 400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00035933552803149354, + "loss": 0.5351, + "step": 401 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003590745136979662, + "loss": 0.5196, + "step": 402 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035881275979138765, + "loss": 0.5447, + "step": 403 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003585502675287104, + "loss": 0.4908, + "step": 404 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035828703813031986, + "loss": 0.5172, + "step": 405 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035802307282002834, + "loss": 0.5923, + "step": 406 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003577583728250699, + "loss": 0.568, + "step": 407 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035749293937609395, + "loss": 0.4618, + "step": 408 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003572267737071601, + "loss": 0.5351, + "step": 409 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003569598770557322, + "loss": 0.5285, + "step": 410 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035669225066267256, + "loss": 0.4571, + "step": 411 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035642389577223625, + "loss": 0.4214, + "step": 412 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003561548136320653, + "loss": 0.5393, + "step": 413 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003558850054931828, + "loss": 0.549, + "step": 414 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035561447260998714, + "loss": 0.4824, + "step": 415 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035534321624024656, + "loss": 0.6244, + "step": 416 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035507123764509245, + "loss": 0.5436, + "step": 417 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003547985380890144, + "loss": 0.5198, + "step": 418 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035452511883985366, + "loss": 0.5979, + "step": 419 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035425098116879754, + "loss": 0.4158, + "step": 420 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035397612635037356, + "loss": 0.5125, + "step": 421 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035370055566244334, + "loss": 0.4699, + "step": 422 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003534242703861966, + "loss": 0.5553, + "step": 423 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035314727180614573, + "loss": 0.5969, + "step": 424 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035286956121011897, + "loss": 0.456, + "step": 425 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003525911398892552, + "loss": 0.5195, + "step": 426 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003523120091379975, + "loss": 0.5187, + "step": 427 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035203217025408726, + "loss": 0.5443, + "step": 428 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003517516245385582, + "loss": 0.4476, + "step": 429 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003514703732957301, + "loss": 0.5757, + "step": 430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035118841783320304, + "loss": 0.5129, + "step": 431 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035090575946185114, + "loss": 0.6354, + "step": 432 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035062239949581645, + "loss": 0.4065, + "step": 433 + }, + { + "epoch": 0.84, + "learning_rate": 0.000350338339252503, + "loss": 0.5472, + "step": 434 + }, + { + "epoch": 0.84, + "learning_rate": 0.00035005358005257045, + "loss": 0.5424, + "step": 435 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034976812321992816, + "loss": 0.6127, + "step": 436 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034948197008172877, + "loss": 0.63, + "step": 437 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003491951219683625, + "loss": 0.413, + "step": 438 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034890758021345034, + "loss": 0.5435, + "step": 439 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034861934615383844, + "loss": 0.5433, + "step": 440 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034833042112959153, + "loss": 0.4763, + "step": 441 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034804080648398667, + "loss": 0.5727, + "step": 442 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034775050356350727, + "loss": 0.5392, + "step": 443 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034745951371783666, + "loss": 0.4981, + "step": 444 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003471678382998518, + "loss": 0.5516, + "step": 445 + }, + { + "epoch": 0.86, + "learning_rate": 0.00034687547866561703, + "loss": 0.4965, + "step": 446 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003465824361743779, + "loss": 0.4982, + "step": 447 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003462887121885544, + "loss": 0.5619, + "step": 448 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003459943080737353, + "loss": 0.5273, + "step": 449 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034569922519867133, + "loss": 0.517, + "step": 450 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034540346493526876, + "loss": 0.4874, + "step": 451 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003451070286585833, + "loss": 0.5966, + "step": 452 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003448099177468137, + "loss": 0.4487, + "step": 453 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003445121335812951, + "loss": 0.5091, + "step": 454 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003442136775464929, + "loss": 0.407, + "step": 455 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003439145510299958, + "loss": 0.6327, + "step": 456 + }, + { + "epoch": 0.88, + "learning_rate": 0.00034361475542251025, + "loss": 0.4217, + "step": 457 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003433142921178531, + "loss": 0.6102, + "step": 458 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003430131625129456, + "loss": 0.5556, + "step": 459 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034271136800780673, + "loss": 0.4986, + "step": 460 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003424089100055467, + "loss": 0.5406, + "step": 461 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034210578991236056, + "loss": 0.5881, + "step": 462 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034180200913752157, + "loss": 0.4869, + "step": 463 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034149756909337454, + "loss": 0.5626, + "step": 464 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003411924711953295, + "loss": 0.564, + "step": 465 + }, + { + "epoch": 0.9, + "learning_rate": 0.00034088671686185486, + "loss": 0.6272, + "step": 466 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003405803075144711, + "loss": 0.4643, + "step": 467 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003402732445777438, + "loss": 0.5435, + "step": 468 + }, + { + "epoch": 0.9, + "learning_rate": 0.00033996552947927744, + "loss": 0.5844, + "step": 469 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003396571636497084, + "loss": 0.5362, + "step": 470 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033934814852269865, + "loss": 0.5607, + "step": 471 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003390384855349285, + "loss": 0.4836, + "step": 472 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033872817612609065, + "loss": 0.6555, + "step": 473 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033841722173888315, + "loss": 0.4784, + "step": 474 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033810562381900253, + "loss": 0.5583, + "step": 475 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033779338381513736, + "loss": 0.4679, + "step": 476 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003374805031789613, + "loss": 0.5325, + "step": 477 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033716698336512654, + "loss": 0.6601, + "step": 478 + }, + { + "epoch": 0.92, + "learning_rate": 0.000336852825831257, + "loss": 0.4838, + "step": 479 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003365380320379414, + "loss": 0.5588, + "step": 480 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033622260344872665, + "loss": 0.4596, + "step": 481 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003359065415301108, + "loss": 0.5228, + "step": 482 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033558984775153663, + "loss": 0.5316, + "step": 483 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033527252358538437, + "loss": 0.4761, + "step": 484 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003349545705069653, + "loss": 0.5254, + "step": 485 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003346359899945144, + "loss": 0.4786, + "step": 486 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033431678352918384, + "loss": 0.4302, + "step": 487 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003339969525950361, + "loss": 0.4914, + "step": 488 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033367649867903663, + "loss": 0.4102, + "step": 489 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003333554232710477, + "loss": 0.4698, + "step": 490 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003330337278638207, + "loss": 0.4454, + "step": 491 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033271141395298964, + "loss": 0.4648, + "step": 492 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033238848303706415, + "loss": 0.4616, + "step": 493 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033206493661742237, + "loss": 0.4861, + "step": 494 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033174077619830416, + "loss": 0.4797, + "step": 495 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033141600328680373, + "loss": 0.5104, + "step": 496 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033109061939286336, + "loss": 0.5712, + "step": 497 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033076462602926553, + "loss": 0.5425, + "step": 498 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033043802471162636, + "loss": 0.6156, + "step": 499 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003301108169583887, + "loss": 0.4282, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003297830042908146, + "loss": 0.4088, + "step": 501 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032945458823297857, + "loss": 0.4866, + "step": 502 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003291255703117605, + "loss": 0.5045, + "step": 503 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003287959520568384, + "loss": 0.491, + "step": 504 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032846573500068136, + "loss": 0.458, + "step": 505 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032813492067854246, + "loss": 0.4508, + "step": 506 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003278035106284516, + "loss": 0.4294, + "step": 507 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032747150639120834, + "loss": 0.4834, + "step": 508 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032713890951037477, + "loss": 0.3857, + "step": 509 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032680572153226834, + "loss": 0.4072, + "step": 510 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003264719440059545, + "loss": 0.4028, + "step": 511 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032613757848323977, + "loss": 0.3789, + "step": 512 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032580262651866446, + "loss": 0.4944, + "step": 513 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003254670896694952, + "loss": 0.4259, + "step": 514 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032513096949571805, + "loss": 0.5037, + "step": 515 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032479426756003093, + "loss": 0.5857, + "step": 516 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003244569854278366, + "loss": 0.5407, + "step": 517 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032411912466723524, + "loss": 0.499, + "step": 518 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003237806868490172, + "loss": 0.4359, + "step": 519 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032344167354665573, + "loss": 0.4374, + "step": 520 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003231020863362997, + "loss": 0.4172, + "step": 521 + }, + { + "epoch": 1.01, + "learning_rate": 0.000322761926796766, + "loss": 0.4451, + "step": 522 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003224211965095326, + "loss": 0.4, + "step": 523 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003220798970587309, + "loss": 0.4009, + "step": 524 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003217380300311386, + "loss": 0.3966, + "step": 525 + }, + { + "epoch": 1.01, + "learning_rate": 0.000321395597016172, + "loss": 0.4255, + "step": 526 + }, + { + "epoch": 1.01, + "learning_rate": 0.00032105259960587895, + "loss": 0.4707, + "step": 527 + }, + { + "epoch": 1.02, + "learning_rate": 0.00032070903939493124, + "loss": 0.5313, + "step": 528 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003203649179806172, + "loss": 0.3596, + "step": 529 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003200202369628345, + "loss": 0.5223, + "step": 530 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031967499794408234, + "loss": 0.4146, + "step": 531 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031932920252945423, + "loss": 0.4328, + "step": 532 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003189828523266306, + "loss": 0.4258, + "step": 533 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031863594894587105, + "loss": 0.4457, + "step": 534 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003182884940000072, + "loss": 0.5249, + "step": 535 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003179404891044348, + "loss": 0.4751, + "step": 536 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031759193587710676, + "loss": 0.5378, + "step": 537 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031724283593852497, + "loss": 0.634, + "step": 538 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031689319091173326, + "loss": 0.4298, + "step": 539 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031654300242230977, + "loss": 0.5469, + "step": 540 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031619227209835917, + "loss": 0.5153, + "step": 541 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003158410015705053, + "loss": 0.4144, + "step": 542 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003154891924718837, + "loss": 0.6041, + "step": 543 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003151368464381335, + "loss": 0.4891, + "step": 544 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003147839651073904, + "loss": 0.5258, + "step": 545 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031443055012027874, + "loss": 0.4351, + "step": 546 + }, + { + "epoch": 1.05, + "learning_rate": 0.000314076603119904, + "loss": 0.4556, + "step": 547 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031372212575184514, + "loss": 0.5445, + "step": 548 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031336711966414675, + "loss": 0.5585, + "step": 549 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003130115865073117, + "loss": 0.367, + "step": 550 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003126555279342933, + "loss": 0.4877, + "step": 551 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003122989456004876, + "loss": 0.4335, + "step": 552 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003119418411637258, + "loss": 0.4383, + "step": 553 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003115842162842663, + "loss": 0.4508, + "step": 554 + }, + { + "epoch": 1.07, + "learning_rate": 0.00031122607262478743, + "loss": 0.4631, + "step": 555 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003108674118503793, + "loss": 0.3496, + "step": 556 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003105082356285361, + "loss": 0.4108, + "step": 557 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003101485456291486, + "loss": 0.4877, + "step": 558 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030978834352449614, + "loss": 0.3696, + "step": 559 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030942763098923913, + "loss": 0.5138, + "step": 560 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030906640970041084, + "loss": 0.5961, + "step": 561 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003087046813374099, + "loss": 0.3824, + "step": 562 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030834244758199276, + "loss": 0.4925, + "step": 563 + }, + { + "epoch": 1.09, + "learning_rate": 0.000307979710118265, + "loss": 0.4511, + "step": 564 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030761647063267457, + "loss": 0.4306, + "step": 565 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003072527308140031, + "loss": 0.468, + "step": 566 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030688849235335856, + "loss": 0.4842, + "step": 567 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003065237569441671, + "loss": 0.4332, + "step": 568 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030615852628216537, + "loss": 0.4637, + "step": 569 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003057928020653925, + "loss": 0.6193, + "step": 570 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003054265859941824, + "loss": 0.5033, + "step": 571 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030505987977115555, + "loss": 0.4185, + "step": 572 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003046926851012114, + "loss": 0.4211, + "step": 573 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003043250036915201, + "loss": 0.5089, + "step": 574 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030395683725151505, + "loss": 0.517, + "step": 575 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003035881874928845, + "loss": 0.492, + "step": 576 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003032190561295636, + "loss": 0.4535, + "step": 577 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003028494448777269, + "loss": 0.3947, + "step": 578 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030247935545577986, + "loss": 0.3125, + "step": 579 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003021087895843511, + "loss": 0.3882, + "step": 580 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003017377489862845, + "loss": 0.4802, + "step": 581 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030136623538663083, + "loss": 0.4652, + "step": 582 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030099425051263994, + "loss": 0.3816, + "step": 583 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003006217960937529, + "loss": 0.4583, + "step": 584 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030024887386159385, + "loss": 0.4568, + "step": 585 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029987548554996174, + "loss": 0.3908, + "step": 586 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002995016328948225, + "loss": 0.4235, + "step": 587 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029912731763430075, + "loss": 0.4138, + "step": 588 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029875254150867216, + "loss": 0.5838, + "step": 589 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002983773062603548, + "loss": 0.462, + "step": 590 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029800161363390145, + "loss": 0.4632, + "step": 591 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029762546537599125, + "loss": 0.5898, + "step": 592 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002972488632354218, + "loss": 0.4742, + "step": 593 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029687180896310065, + "loss": 0.4579, + "step": 594 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002964943043120378, + "loss": 0.5514, + "step": 595 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029611635103733675, + "loss": 0.4304, + "step": 596 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002957379508961871, + "loss": 0.4383, + "step": 597 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029535910564785584, + "loss": 0.5327, + "step": 598 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029497981705367933, + "loss": 0.4781, + "step": 599 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029460008687705525, + "loss": 0.4178, + "step": 600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002942199168834342, + "loss": 0.3987, + "step": 601 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029383930884031183, + "loss": 0.3861, + "step": 602 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029345826451722005, + "loss": 0.5322, + "step": 603 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029307678568571936, + "loss": 0.3997, + "step": 604 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002926948741193903, + "loss": 0.4121, + "step": 605 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029231253159382514, + "loss": 0.4931, + "step": 606 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029192975988662017, + "loss": 0.4626, + "step": 607 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029154656077736666, + "loss": 0.4441, + "step": 608 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002911629360476432, + "loss": 0.3863, + "step": 609 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029077888748100703, + "loss": 0.36, + "step": 610 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029039441686298594, + "loss": 0.4246, + "step": 611 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002900095259810702, + "loss": 0.4916, + "step": 612 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028962421662470346, + "loss": 0.4896, + "step": 613 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028923849058527535, + "loss": 0.4237, + "step": 614 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028885234965611274, + "loss": 0.5727, + "step": 615 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028846579563247116, + "loss": 0.5681, + "step": 616 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002880788303115269, + "loss": 0.4383, + "step": 617 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028769145549236845, + "loss": 0.4962, + "step": 618 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002873036729759881, + "loss": 0.5472, + "step": 619 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002869154845652738, + "loss": 0.5431, + "step": 620 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002865268920650003, + "loss": 0.4152, + "step": 621 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002861378972818211, + "loss": 0.3922, + "step": 622 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002857485020242602, + "loss": 0.5129, + "step": 623 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002853587081027034, + "loss": 0.4328, + "step": 624 + }, + { + "epoch": 1.2, + "learning_rate": 0.00028496851732938997, + "loss": 0.4431, + "step": 625 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002845779315184042, + "loss": 0.4968, + "step": 626 + }, + { + "epoch": 1.21, + "learning_rate": 0.000284186952485667, + "loss": 0.5301, + "step": 627 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002837955820489276, + "loss": 0.4332, + "step": 628 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002834038220277546, + "loss": 0.4245, + "step": 629 + }, + { + "epoch": 1.21, + "learning_rate": 0.00028301167424352836, + "loss": 0.5057, + "step": 630 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028261914051943166, + "loss": 0.4623, + "step": 631 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028222622268044174, + "loss": 0.5452, + "step": 632 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028183292255332164, + "loss": 0.5238, + "step": 633 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028143924196661176, + "loss": 0.3966, + "step": 634 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002810451827506214, + "loss": 0.35, + "step": 635 + }, + { + "epoch": 1.23, + "learning_rate": 0.00028065074673742007, + "loss": 0.4325, + "step": 636 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002802559357608292, + "loss": 0.4854, + "step": 637 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027986075165641343, + "loss": 0.4254, + "step": 638 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027946519626147225, + "loss": 0.4614, + "step": 639 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027906927141503125, + "loss": 0.3798, + "step": 640 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027867297895783373, + "loss": 0.4742, + "step": 641 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002782763207323322, + "loss": 0.4007, + "step": 642 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002778792985826795, + "loss": 0.4383, + "step": 643 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002774819143547206, + "loss": 0.4298, + "step": 644 + }, + { + "epoch": 1.24, + "learning_rate": 0.00027708416989598387, + "loss": 0.5178, + "step": 645 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002766860670556722, + "loss": 0.3434, + "step": 646 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002762876076846551, + "loss": 0.3862, + "step": 647 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027588879363545934, + "loss": 0.4459, + "step": 648 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002754896267622608, + "loss": 0.3934, + "step": 649 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027509010892087565, + "loss": 0.4349, + "step": 650 + }, + { + "epoch": 1.25, + "learning_rate": 0.000274690241968752, + "loss": 0.4178, + "step": 651 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002742900277649607, + "loss": 0.4151, + "step": 652 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002738894681701874, + "loss": 0.3888, + "step": 653 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027348856504672323, + "loss": 0.4214, + "step": 654 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002730873202584567, + "loss": 0.519, + "step": 655 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027268573567086477, + "loss": 0.5463, + "step": 656 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027228381315100417, + "loss": 0.3367, + "step": 657 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027188155456750256, + "loss": 0.4629, + "step": 658 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027147896179055043, + "loss": 0.4456, + "step": 659 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002710760366918917, + "loss": 0.4348, + "step": 660 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002706727811448153, + "loss": 0.4505, + "step": 661 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002702691970241468, + "loss": 0.5028, + "step": 662 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026986528620623904, + "loss": 0.5257, + "step": 663 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026946105056896403, + "loss": 0.4977, + "step": 664 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026905649199170377, + "loss": 0.421, + "step": 665 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002686516123553417, + "loss": 0.4931, + "step": 666 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026824641354225397, + "loss": 0.5818, + "step": 667 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002678408974363005, + "loss": 0.4211, + "step": 668 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026743506592281674, + "loss": 0.5182, + "step": 669 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026702892088860413, + "loss": 0.5591, + "step": 670 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002666224642219221, + "loss": 0.5363, + "step": 671 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002662156978124786, + "loss": 0.5866, + "step": 672 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002658086235514218, + "loss": 0.422, + "step": 673 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002654012433313312, + "loss": 0.5375, + "step": 674 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002649935590462087, + "loss": 0.4752, + "step": 675 + }, + { + "epoch": 1.3, + "learning_rate": 0.00026458557259146986, + "loss": 0.4271, + "step": 676 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002641772858639351, + "loss": 0.4843, + "step": 677 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026376870076182086, + "loss": 0.4827, + "step": 678 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026335981918473086, + "loss": 0.47, + "step": 679 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002629506430336472, + "loss": 0.368, + "step": 680 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026254117421092133, + "loss": 0.481, + "step": 681 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002621314146202656, + "loss": 0.4153, + "step": 682 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002617213661667443, + "loss": 0.4397, + "step": 683 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002613110307567643, + "loss": 0.4052, + "step": 684 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026090041029806695, + "loss": 0.4652, + "step": 685 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026048950669971884, + "loss": 0.3826, + "step": 686 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026007832187210277, + "loss": 0.5639, + "step": 687 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025966685772690906, + "loss": 0.3917, + "step": 688 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025925511617712685, + "loss": 0.5248, + "step": 689 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002588430991370347, + "loss": 0.3796, + "step": 690 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002584308085221922, + "loss": 0.4391, + "step": 691 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025801824624943084, + "loss": 0.4514, + "step": 692 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025760541423684496, + "loss": 0.5046, + "step": 693 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002571923144037831, + "loss": 0.4578, + "step": 694 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002567789486708389, + "loss": 0.4681, + "step": 695 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025636531895984236, + "loss": 0.4501, + "step": 696 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002559514271938506, + "loss": 0.4411, + "step": 697 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025553727529713916, + "loss": 0.401, + "step": 698 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025512286519519293, + "loss": 0.4911, + "step": 699 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002547081988146974, + "loss": 0.3754, + "step": 700 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025429327808352946, + "loss": 0.3807, + "step": 701 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002538781049307486, + "loss": 0.4193, + "step": 702 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002534626812865876, + "loss": 0.5259, + "step": 703 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025304700908244433, + "loss": 0.3684, + "step": 704 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002526310902508718, + "loss": 0.5423, + "step": 705 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002522149267255699, + "loss": 0.4288, + "step": 706 + }, + { + "epoch": 1.36, + "learning_rate": 0.000251798520441376, + "loss": 0.5046, + "step": 707 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002513818733342564, + "loss": 0.3777, + "step": 708 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025096498734129667, + "loss": 0.5171, + "step": 709 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002505478644006932, + "loss": 0.3785, + "step": 710 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025013050645174414, + "loss": 0.5413, + "step": 711 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024971291543483994, + "loss": 0.5018, + "step": 712 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024929509329145477, + "loss": 0.5212, + "step": 713 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024887704196413746, + "loss": 0.483, + "step": 714 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002484587633965023, + "loss": 0.3684, + "step": 715 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024804025953322005, + "loss": 0.3782, + "step": 716 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024762153232000877, + "loss": 0.4995, + "step": 717 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002472025837036253, + "loss": 0.4324, + "step": 718 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002467834156318555, + "loss": 0.5203, + "step": 719 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002463640300535057, + "loss": 0.423, + "step": 720 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002459444289183933, + "loss": 0.4537, + "step": 721 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024552461417733817, + "loss": 0.4124, + "step": 722 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002451045877821528, + "loss": 0.4865, + "step": 723 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002446843516856343, + "loss": 0.4845, + "step": 724 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024426390784155425, + "loss": 0.4174, + "step": 725 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024384325820465033, + "loss": 0.4456, + "step": 726 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002434224047306169, + "loss": 0.4429, + "step": 727 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002430013493760961, + "loss": 0.363, + "step": 728 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024258009409866853, + "loss": 0.4769, + "step": 729 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024215864085684442, + "loss": 0.4597, + "step": 730 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024173699161005429, + "loss": 0.366, + "step": 731 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024131514831863995, + "loss": 0.4746, + "step": 732 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002408931129438453, + "loss": 0.5608, + "step": 733 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024047088744780744, + "loss": 0.4292, + "step": 734 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024004847379354726, + "loss": 0.4743, + "step": 735 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023962587394496038, + "loss": 0.3855, + "step": 736 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023920308986680834, + "loss": 0.4573, + "step": 737 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023878012352470892, + "loss": 0.3937, + "step": 738 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002383569768851274, + "loss": 0.4371, + "step": 739 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023793365191536735, + "loss": 0.5432, + "step": 740 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023751015058356135, + "loss": 0.4803, + "step": 741 + }, + { + "epoch": 1.43, + "learning_rate": 0.000237086474858662, + "loss": 0.4281, + "step": 742 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023666262671043263, + "loss": 0.4031, + "step": 743 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023623860810943826, + "loss": 0.4725, + "step": 744 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002358144210270364, + "loss": 0.4644, + "step": 745 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023539006743536774, + "loss": 0.4848, + "step": 746 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023496554930734718, + "loss": 0.4084, + "step": 747 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023454086861665472, + "loss": 0.4322, + "step": 748 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023411602733772595, + "loss": 0.4847, + "step": 749 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023369102744574312, + "loss": 0.4298, + "step": 750 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023326587091662603, + "loss": 0.4268, + "step": 751 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023284055972702254, + "loss": 0.4089, + "step": 752 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002324150958542997, + "loss": 0.4214, + "step": 753 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023198948127653446, + "loss": 0.5576, + "step": 754 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023156371797250418, + "loss": 0.4377, + "step": 755 + }, + { + "epoch": 1.46, + "learning_rate": 0.00023113780792167785, + "loss": 0.4934, + "step": 756 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002307117531042068, + "loss": 0.3698, + "step": 757 + }, + { + "epoch": 1.46, + "learning_rate": 0.00023028555550091536, + "loss": 0.4722, + "step": 758 + }, + { + "epoch": 1.46, + "learning_rate": 0.00022985921709329157, + "loss": 0.3837, + "step": 759 + }, + { + "epoch": 1.46, + "learning_rate": 0.00022943273986347822, + "loss": 0.5132, + "step": 760 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002290061257942635, + "loss": 0.487, + "step": 761 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022857937686907183, + "loss": 0.3857, + "step": 762 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022815249507195445, + "loss": 0.4135, + "step": 763 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022772548238758064, + "loss": 0.4639, + "step": 764 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022729834080122791, + "loss": 0.5297, + "step": 765 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022687107229877324, + "loss": 0.4485, + "step": 766 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022644367886668357, + "loss": 0.467, + "step": 767 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022601616249200675, + "loss": 0.4304, + "step": 768 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022558852516236217, + "loss": 0.5531, + "step": 769 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022516076886593158, + "loss": 0.5021, + "step": 770 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022473289559144988, + "loss": 0.433, + "step": 771 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022430490732819566, + "loss": 0.505, + "step": 772 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022387680606598235, + "loss": 0.4677, + "step": 773 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022344859379514858, + "loss": 0.4421, + "step": 774 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022302027250654905, + "loss": 0.4282, + "step": 775 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002225918441915456, + "loss": 0.366, + "step": 776 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022216331084199724, + "loss": 0.4147, + "step": 777 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022173467445025158, + "loss": 0.586, + "step": 778 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022130593700913522, + "loss": 0.5285, + "step": 779 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022087710051194463, + "loss": 0.4484, + "step": 780 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002204481669524367, + "loss": 0.4063, + "step": 781 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002200191383248197, + "loss": 0.4751, + "step": 782 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021959001662374373, + "loss": 0.3936, + "step": 783 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021916080384429184, + "loss": 0.4433, + "step": 784 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002187315019819703, + "loss": 0.4883, + "step": 785 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021830211303269965, + "loss": 0.4925, + "step": 786 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021787263899280537, + "loss": 0.4597, + "step": 787 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021744308185900848, + "loss": 0.4954, + "step": 788 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021701344362841626, + "loss": 0.4025, + "step": 789 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021658372629851318, + "loss": 0.5734, + "step": 790 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021615393186715128, + "loss": 0.3779, + "step": 791 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021572406233254116, + "loss": 0.4994, + "step": 792 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021529411969324275, + "loss": 0.5359, + "step": 793 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021486410594815554, + "loss": 0.4738, + "step": 794 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021443402309650979, + "loss": 0.4915, + "step": 795 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021400387313785704, + "loss": 0.4991, + "step": 796 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021357365807206087, + "loss": 0.4503, + "step": 797 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002131433798992874, + "loss": 0.4887, + "step": 798 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021271304061999633, + "loss": 0.4279, + "step": 799 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021228264223493139, + "loss": 0.5367, + "step": 800 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021185218674511097, + "loss": 0.3212, + "step": 801 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021142167615181915, + "loss": 0.493, + "step": 802 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002109911124565962, + "loss": 0.3635, + "step": 803 + }, + { + "epoch": 1.55, + "learning_rate": 0.00021056049766122916, + "loss": 0.3494, + "step": 804 + }, + { + "epoch": 1.55, + "learning_rate": 0.00021012983376774254, + "loss": 0.3536, + "step": 805 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002096991227783895, + "loss": 0.4335, + "step": 806 + }, + { + "epoch": 1.55, + "learning_rate": 0.00020926836669564168, + "loss": 0.4673, + "step": 807 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020883756752218075, + "loss": 0.5269, + "step": 808 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002084067272608886, + "loss": 0.4365, + "step": 809 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020797584791483806, + "loss": 0.2998, + "step": 810 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020754493148728375, + "loss": 0.4318, + "step": 811 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020711397998165264, + "loss": 0.3205, + "step": 812 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020668299540153493, + "loss": 0.5026, + "step": 813 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020625197975067438, + "loss": 0.3923, + "step": 814 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002058209350329594, + "loss": 0.3026, + "step": 815 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020538986325241342, + "loss": 0.4729, + "step": 816 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020495876641318567, + "loss": 0.3236, + "step": 817 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002045276465195419, + "loss": 0.4934, + "step": 818 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020409650557585523, + "loss": 0.3791, + "step": 819 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020366534558659635, + "loss": 0.3879, + "step": 820 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020323416855632477, + "loss": 0.3807, + "step": 821 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020280297648967897, + "loss": 0.415, + "step": 822 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020237177139136758, + "loss": 0.3824, + "step": 823 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002019405552661596, + "loss": 0.3854, + "step": 824 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020150933011887543, + "loss": 0.3781, + "step": 825 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020107809795437745, + "loss": 0.3904, + "step": 826 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020064686077756057, + "loss": 0.3213, + "step": 827 + }, + { + "epoch": 1.6, + "learning_rate": 0.00020021562059334302, + "loss": 0.4888, + "step": 828 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019978437940665702, + "loss": 0.3375, + "step": 829 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001993531392224394, + "loss": 0.4325, + "step": 830 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019892190204562257, + "loss": 0.2865, + "step": 831 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001984906698811246, + "loss": 0.3984, + "step": 832 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019805944473384038, + "loss": 0.3011, + "step": 833 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019762822860863247, + "loss": 0.4063, + "step": 834 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019719702351032105, + "loss": 0.4489, + "step": 835 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019676583144367525, + "loss": 0.3991, + "step": 836 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019633465441340367, + "loss": 0.2736, + "step": 837 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019590349442414484, + "loss": 0.3842, + "step": 838 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001954723534804581, + "loss": 0.346, + "step": 839 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001950412335868144, + "loss": 0.3435, + "step": 840 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019461013674758668, + "loss": 0.4139, + "step": 841 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019417906496704064, + "loss": 0.4031, + "step": 842 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019374802024932567, + "loss": 0.352, + "step": 843 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019331700459846517, + "loss": 0.332, + "step": 844 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019288602001834735, + "loss": 0.3822, + "step": 845 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019245506851271632, + "loss": 0.3912, + "step": 846 + }, + { + "epoch": 1.63, + "learning_rate": 0.000192024152085162, + "loss": 0.4028, + "step": 847 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019159327273911145, + "loss": 0.3625, + "step": 848 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019116243247781927, + "loss": 0.392, + "step": 849 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019073163330435842, + "loss": 0.3687, + "step": 850 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019030087722161055, + "loss": 0.3794, + "step": 851 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018987016623225747, + "loss": 0.4095, + "step": 852 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018943950233877094, + "loss": 0.3665, + "step": 853 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018900888754340382, + "loss": 0.3826, + "step": 854 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001885783238481809, + "loss": 0.4903, + "step": 855 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018814781325488905, + "loss": 0.3174, + "step": 856 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018771735776506866, + "loss": 0.3987, + "step": 857 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001872869593800037, + "loss": 0.4488, + "step": 858 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018685662010071261, + "loss": 0.3298, + "step": 859 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001864263419279392, + "loss": 0.3385, + "step": 860 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018599612686214304, + "loss": 0.4442, + "step": 861 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018556597690349023, + "loss": 0.3132, + "step": 862 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001851358940518445, + "loss": 0.5051, + "step": 863 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001847058803067573, + "loss": 0.3919, + "step": 864 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018427593766745884, + "loss": 0.4193, + "step": 865 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001838460681328488, + "loss": 0.3196, + "step": 866 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018341627370148692, + "loss": 0.6047, + "step": 867 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018298655637158376, + "loss": 0.3741, + "step": 868 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018255691814099157, + "loss": 0.4033, + "step": 869 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018212736100719465, + "loss": 0.422, + "step": 870 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018169788696730034, + "loss": 0.3358, + "step": 871 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018126849801802976, + "loss": 0.3295, + "step": 872 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018083919615570823, + "loss": 0.4859, + "step": 873 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018040998337625627, + "loss": 0.2917, + "step": 874 + }, + { + "epoch": 1.69, + "learning_rate": 0.00017998086167518034, + "loss": 0.3505, + "step": 875 + }, + { + "epoch": 1.69, + "learning_rate": 0.00017955183304756332, + "loss": 0.5962, + "step": 876 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001791228994880554, + "loss": 0.3097, + "step": 877 + }, + { + "epoch": 1.69, + "learning_rate": 0.00017869406299086482, + "loss": 0.4547, + "step": 878 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001782653255497485, + "loss": 0.4567, + "step": 879 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001778366891580028, + "loss": 0.3815, + "step": 880 + }, + { + "epoch": 1.7, + "learning_rate": 0.00017740815580845446, + "loss": 0.5061, + "step": 881 + }, + { + "epoch": 1.7, + "learning_rate": 0.00017697972749345094, + "loss": 0.3729, + "step": 882 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001765514062048515, + "loss": 0.3626, + "step": 883 + }, + { + "epoch": 1.7, + "learning_rate": 0.00017612319393401772, + "loss": 0.4256, + "step": 884 + }, + { + "epoch": 1.7, + "learning_rate": 0.00017569509267180433, + "loss": 0.4114, + "step": 885 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017526710440855017, + "loss": 0.3554, + "step": 886 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017483923113406844, + "loss": 0.4741, + "step": 887 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017441147483763785, + "loss": 0.4472, + "step": 888 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017398383750799327, + "loss": 0.4325, + "step": 889 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017355632113331648, + "loss": 0.4184, + "step": 890 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017312892770122678, + "loss": 0.468, + "step": 891 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001727016591987721, + "loss": 0.3996, + "step": 892 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017227451761241938, + "loss": 0.4718, + "step": 893 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017184750492804554, + "loss": 0.4076, + "step": 894 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017142062313092824, + "loss": 0.337, + "step": 895 + }, + { + "epoch": 1.73, + "learning_rate": 0.00017099387420573656, + "loss": 0.3534, + "step": 896 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001705672601365218, + "loss": 0.4757, + "step": 897 + }, + { + "epoch": 1.73, + "learning_rate": 0.00017014078290670848, + "loss": 0.4629, + "step": 898 + }, + { + "epoch": 1.73, + "learning_rate": 0.00016971444449908474, + "loss": 0.4999, + "step": 899 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001692882468957932, + "loss": 0.4404, + "step": 900 + } + ], + "logging_steps": 1, + "max_steps": 1557, + "num_train_epochs": 3, + "save_steps": 50, + "total_flos": 1.2058110331294188e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c4843df7aa1383a371fb28dea27d303b1a1145e1 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:637e0437b5818f76ea2fea2aa5b87010fc39a85bdfc12277d436c72e69d11811 +size 4155 diff --git a/checkpoint-950/README.md b/checkpoint-950/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b32efe7366f05d1d90816d2ad9e4b06ccca46bea --- /dev/null +++ b/checkpoint-950/README.md @@ -0,0 +1,219 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 + +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-950/adapter_config.json b/checkpoint-950/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e108f2da037ef6250457c67a4bedd308d97303c --- /dev/null +++ b/checkpoint-950/adapter_config.json @@ -0,0 +1,24 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-950/adapter_model.bin b/checkpoint-950/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5de3f3e6c9e61b9bd3e08f609bbd9ec5373788b9 --- /dev/null +++ b/checkpoint-950/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beeedc987a15ea2421995f369107f9b533b5d424088562667fdc59e9d36e27f2 +size 113314765 diff --git a/checkpoint-950/optimizer.pt b/checkpoint-950/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9a2adbd936e4e5164b8b782b00f4dd32ac8ac4b --- /dev/null +++ b/checkpoint-950/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f291998fa6fdd299a4b6a2daf74ba021219c7af0a4c81a473fecf5f70351cc8 +size 226653957 diff --git a/checkpoint-950/rng_state.pth b/checkpoint-950/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d897fb7deed86b4ea7454db56275119d91fad93 --- /dev/null +++ b/checkpoint-950/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7403abf508f51b347013a05e6d43ff6eebbe64a861e58d2f63602d8fb86d8c62 +size 14575 diff --git a/checkpoint-950/scheduler.pt b/checkpoint-950/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bedc35738530bf6c63f435291f87d9c1e18bdd7e --- /dev/null +++ b/checkpoint-950/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0eb2a89b97372834b870845c6014b3a93146e0ed69632269d8d4bad9b0c7744 +size 627 diff --git a/checkpoint-950/trainer_state.json b/checkpoint-950/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..17b02232b2fa8afa8fc7e49e39ea063dc98db121 --- /dev/null +++ b/checkpoint-950/trainer_state.json @@ -0,0 +1,5719 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.8299434348297028, + "eval_steps": 500, + "global_step": 950, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6869, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.8396, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 0.7489, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7252, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.6548, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 0.8022, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6524, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6981, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 0.7488, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 0.6368, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.6891, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 0.7968, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 5.2000000000000004e-05, + "loss": 0.6912, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 5.6000000000000006e-05, + "loss": 0.8452, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 6e-05, + "loss": 0.6989, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 6.400000000000001e-05, + "loss": 0.6685, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-05, + "loss": 0.5469, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 7.2e-05, + "loss": 0.7915, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 7.6e-05, + "loss": 0.7744, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 8e-05, + "loss": 0.6804, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 8.4e-05, + "loss": 0.7796, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 8.800000000000001e-05, + "loss": 0.706, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000001e-05, + "loss": 0.6798, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 9.6e-05, + "loss": 0.6333, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001, + "loss": 0.6012, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010400000000000001, + "loss": 0.52, + "step": 26 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010800000000000001, + "loss": 0.6583, + "step": 27 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011200000000000001, + "loss": 0.7354, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 0.000116, + "loss": 0.6296, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012, + "loss": 0.6352, + "step": 30 + }, + { + "epoch": 0.06, + "learning_rate": 0.000124, + "loss": 0.6007, + "step": 31 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012800000000000002, + "loss": 0.5659, + "step": 32 + }, + { + "epoch": 0.06, + "learning_rate": 0.000132, + "loss": 0.5138, + "step": 33 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013600000000000003, + "loss": 0.6639, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014, + "loss": 0.5934, + "step": 35 + }, + { + "epoch": 0.07, + "learning_rate": 0.000144, + "loss": 0.5233, + "step": 36 + }, + { + "epoch": 0.07, + "learning_rate": 0.000148, + "loss": 0.5307, + "step": 37 + }, + { + "epoch": 0.07, + "learning_rate": 0.000152, + "loss": 0.5928, + "step": 38 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015600000000000002, + "loss": 0.5908, + "step": 39 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016, + "loss": 0.6366, + "step": 40 + }, + { + "epoch": 0.08, + "learning_rate": 0.000164, + "loss": 0.5972, + "step": 41 + }, + { + "epoch": 0.08, + "learning_rate": 0.000168, + "loss": 0.4825, + "step": 42 + }, + { + "epoch": 0.08, + "learning_rate": 0.000172, + "loss": 0.6783, + "step": 43 + }, + { + "epoch": 0.08, + "learning_rate": 0.00017600000000000002, + "loss": 0.6082, + "step": 44 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018, + "loss": 0.7633, + "step": 45 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018400000000000003, + "loss": 0.5988, + "step": 46 + }, + { + "epoch": 0.09, + "learning_rate": 0.000188, + "loss": 0.6658, + "step": 47 + }, + { + "epoch": 0.09, + "learning_rate": 0.000192, + "loss": 0.5945, + "step": 48 + }, + { + "epoch": 0.09, + "learning_rate": 0.000196, + "loss": 0.5984, + "step": 49 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002, + "loss": 0.6778, + "step": 50 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020400000000000003, + "loss": 0.6057, + "step": 51 + }, + { + "epoch": 0.1, + "learning_rate": 0.00020800000000000001, + "loss": 0.601, + "step": 52 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021200000000000003, + "loss": 0.5566, + "step": 53 + }, + { + "epoch": 0.1, + "learning_rate": 0.00021600000000000002, + "loss": 0.5911, + "step": 54 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022000000000000003, + "loss": 0.7636, + "step": 55 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022400000000000002, + "loss": 0.5537, + "step": 56 + }, + { + "epoch": 0.11, + "learning_rate": 0.00022799999999999999, + "loss": 0.6037, + "step": 57 + }, + { + "epoch": 0.11, + "learning_rate": 0.000232, + "loss": 0.6474, + "step": 58 + }, + { + "epoch": 0.11, + "learning_rate": 0.000236, + "loss": 0.6483, + "step": 59 + }, + { + "epoch": 0.12, + "learning_rate": 0.00024, + "loss": 0.5021, + "step": 60 + }, + { + "epoch": 0.12, + "learning_rate": 0.000244, + "loss": 0.5347, + "step": 61 + }, + { + "epoch": 0.12, + "learning_rate": 0.000248, + "loss": 0.5791, + "step": 62 + }, + { + "epoch": 0.12, + "learning_rate": 0.000252, + "loss": 0.5407, + "step": 63 + }, + { + "epoch": 0.12, + "learning_rate": 0.00025600000000000004, + "loss": 0.5298, + "step": 64 + }, + { + "epoch": 0.13, + "learning_rate": 0.00026000000000000003, + "loss": 0.5685, + "step": 65 + }, + { + "epoch": 0.13, + "learning_rate": 0.000264, + "loss": 0.5108, + "step": 66 + }, + { + "epoch": 0.13, + "learning_rate": 0.000268, + "loss": 0.526, + "step": 67 + }, + { + "epoch": 0.13, + "learning_rate": 0.00027200000000000005, + "loss": 0.6843, + "step": 68 + }, + { + "epoch": 0.13, + "learning_rate": 0.000276, + "loss": 0.6608, + "step": 69 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028, + "loss": 0.5866, + "step": 70 + }, + { + "epoch": 0.14, + "learning_rate": 0.000284, + "loss": 0.6422, + "step": 71 + }, + { + "epoch": 0.14, + "learning_rate": 0.000288, + "loss": 0.449, + "step": 72 + }, + { + "epoch": 0.14, + "learning_rate": 0.000292, + "loss": 0.5319, + "step": 73 + }, + { + "epoch": 0.14, + "learning_rate": 0.000296, + "loss": 0.5977, + "step": 74 + }, + { + "epoch": 0.14, + "learning_rate": 0.00030000000000000003, + "loss": 0.5805, + "step": 75 + }, + { + "epoch": 0.15, + "learning_rate": 0.000304, + "loss": 0.5209, + "step": 76 + }, + { + "epoch": 0.15, + "learning_rate": 0.000308, + "loss": 0.6098, + "step": 77 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031200000000000005, + "loss": 0.4665, + "step": 78 + }, + { + "epoch": 0.15, + "learning_rate": 0.00031600000000000004, + "loss": 0.6882, + "step": 79 + }, + { + "epoch": 0.15, + "learning_rate": 0.00032, + "loss": 0.5427, + "step": 80 + }, + { + "epoch": 0.16, + "learning_rate": 0.000324, + "loss": 0.5345, + "step": 81 + }, + { + "epoch": 0.16, + "learning_rate": 0.000328, + "loss": 0.663, + "step": 82 + }, + { + "epoch": 0.16, + "learning_rate": 0.000332, + "loss": 0.5393, + "step": 83 + }, + { + "epoch": 0.16, + "learning_rate": 0.000336, + "loss": 0.5711, + "step": 84 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034, + "loss": 0.5261, + "step": 85 + }, + { + "epoch": 0.17, + "learning_rate": 0.000344, + "loss": 0.5775, + "step": 86 + }, + { + "epoch": 0.17, + "learning_rate": 0.000348, + "loss": 0.6329, + "step": 87 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035200000000000005, + "loss": 0.4425, + "step": 88 + }, + { + "epoch": 0.17, + "learning_rate": 0.00035600000000000003, + "loss": 0.6837, + "step": 89 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036, + "loss": 0.615, + "step": 90 + }, + { + "epoch": 0.18, + "learning_rate": 0.000364, + "loss": 0.5615, + "step": 91 + }, + { + "epoch": 0.18, + "learning_rate": 0.00036800000000000005, + "loss": 0.5434, + "step": 92 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037200000000000004, + "loss": 0.5864, + "step": 93 + }, + { + "epoch": 0.18, + "learning_rate": 0.000376, + "loss": 0.5583, + "step": 94 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038, + "loss": 0.5299, + "step": 95 + }, + { + "epoch": 0.18, + "learning_rate": 0.000384, + "loss": 0.532, + "step": 96 + }, + { + "epoch": 0.19, + "learning_rate": 0.000388, + "loss": 0.5227, + "step": 97 + }, + { + "epoch": 0.19, + "learning_rate": 0.000392, + "loss": 0.5275, + "step": 98 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039600000000000003, + "loss": 0.4541, + "step": 99 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004, + "loss": 0.6485, + "step": 100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003999995350775973, + "loss": 0.5438, + "step": 101 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999814031255063, + "loss": 0.5997, + "step": 102 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039999581571134455, + "loss": 0.5322, + "step": 103 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003999925612847867, + "loss": 0.484, + "step": 104 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998837704800766, + "loss": 0.5961, + "step": 105 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039998326302046085, + "loss": 0.7405, + "step": 106 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997721922592255, + "loss": 0.5802, + "step": 107 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039997024569249167, + "loss": 0.769, + "step": 108 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999623424525898, + "loss": 0.5598, + "step": 109 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999535095429608, + "loss": 0.6143, + "step": 110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039994374700467095, + "loss": 0.5766, + "step": 111 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039993305488310836, + "loss": 0.7695, + "step": 112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999214332279831, + "loss": 0.7153, + "step": 113 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003999088820933269, + "loss": 0.5835, + "step": 114 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039989540153749286, + "loss": 0.6634, + "step": 115 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399880991623155, + "loss": 0.6069, + "step": 116 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998656524173082, + "loss": 0.7224, + "step": 117 + }, + { + "epoch": 0.23, + "learning_rate": 0.000399849383991268, + "loss": 0.5884, + "step": 118 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003998321864206699, + "loss": 0.5122, + "step": 119 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039981405978546924, + "loss": 0.6453, + "step": 120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997950041699408, + "loss": 0.4665, + "step": 121 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003997750196626785, + "loss": 0.5428, + "step": 122 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039975410635659464, + "loss": 0.4365, + "step": 123 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039973226434891995, + "loss": 0.5978, + "step": 124 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039970949374120286, + "loss": 0.7729, + "step": 125 + }, + { + "epoch": 0.24, + "learning_rate": 0.000399685794639309, + "loss": 0.6212, + "step": 126 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039966116715342066, + "loss": 0.5426, + "step": 127 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039963561139803676, + "loss": 0.5782, + "step": 128 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003996091274919716, + "loss": 0.6701, + "step": 129 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995817155583548, + "loss": 0.6314, + "step": 130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995533757246307, + "loss": 0.6662, + "step": 131 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003995241081225573, + "loss": 0.5192, + "step": 132 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994939128882065, + "loss": 0.5591, + "step": 133 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994627901619625, + "loss": 0.5809, + "step": 134 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003994307400885219, + "loss": 0.4871, + "step": 135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993977628168928, + "loss": 0.6666, + "step": 136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003993638585003938, + "loss": 0.6469, + "step": 137 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039932902729665357, + "loss": 0.5727, + "step": 138 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039929326936761036, + "loss": 0.6715, + "step": 139 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039925658487951067, + "loss": 0.5686, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039921897400290894, + "loss": 0.501, + "step": 141 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039918043691266665, + "loss": 0.5795, + "step": 142 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039914097378795124, + "loss": 0.6287, + "step": 143 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039910058481223564, + "loss": 0.7016, + "step": 144 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039905927017329726, + "loss": 0.6232, + "step": 145 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039901703006321715, + "loss": 0.5291, + "step": 146 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039897386467837903, + "loss": 0.5297, + "step": 147 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892977421946844, + "loss": 0.5784, + "step": 148 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988847588914718, + "loss": 0.5714, + "step": 149 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988388189036754, + "loss": 0.5044, + "step": 150 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987919544696646, + "loss": 0.8246, + "step": 151 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003987441658073226, + "loss": 0.5048, + "step": 152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003986954531388297, + "loss": 0.5433, + "step": 153 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039864581669066186, + "loss": 0.5251, + "step": 154 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003985952566935902, + "loss": 0.5708, + "step": 155 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039854377338267936, + "loss": 0.6276, + "step": 156 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039849136699728684, + "loss": 0.4915, + "step": 157 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003984380377810617, + "loss": 0.6389, + "step": 158 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039838378598194325, + "loss": 0.6067, + "step": 159 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039832861185216045, + "loss": 0.6136, + "step": 160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003982725156482301, + "loss": 0.5597, + "step": 161 + }, + { + "epoch": 0.31, + "learning_rate": 0.000398215497630956, + "loss": 0.5957, + "step": 162 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003981575580654278, + "loss": 0.5853, + "step": 163 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980986972210194, + "loss": 0.5462, + "step": 164 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980389153713881, + "loss": 0.5302, + "step": 165 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039797821279447307, + "loss": 0.5395, + "step": 166 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039791658977249425, + "loss": 0.7004, + "step": 167 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039785404659195084, + "loss": 0.5622, + "step": 168 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039779058354362013, + "loss": 0.5759, + "step": 169 + }, + { + "epoch": 0.33, + "learning_rate": 0.000397726200922556, + "loss": 0.6184, + "step": 170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003976608990280877, + "loss": 0.5488, + "step": 171 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975946781638183, + "loss": 0.6162, + "step": 172 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003975275386376236, + "loss": 0.558, + "step": 173 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003974594807616502, + "loss": 0.519, + "step": 174 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003973905048523144, + "loss": 0.6195, + "step": 175 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039732061123030064, + "loss": 0.5991, + "step": 176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003972498002205601, + "loss": 0.5428, + "step": 177 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039717807215230896, + "loss": 0.5323, + "step": 178 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039710542735902705, + "loss": 0.5307, + "step": 179 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003970318661784564, + "loss": 0.5783, + "step": 180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003969573889525993, + "loss": 0.5924, + "step": 181 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039688199602771714, + "loss": 0.5902, + "step": 182 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039680568775432855, + "loss": 0.6291, + "step": 183 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003967284644872077, + "loss": 0.5942, + "step": 184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003966503265853829, + "loss": 0.4878, + "step": 185 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003965712744121347, + "loss": 0.6487, + "step": 186 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003964913083349945, + "loss": 0.6111, + "step": 187 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039641042872574233, + "loss": 0.6072, + "step": 188 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039632863596040575, + "loss": 0.716, + "step": 189 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039624593041925763, + "loss": 0.6178, + "step": 190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003961623124868145, + "loss": 0.6323, + "step": 191 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039607778255183485, + "loss": 0.5821, + "step": 192 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959923410073174, + "loss": 0.6738, + "step": 193 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003959059882504989, + "loss": 0.6203, + "step": 194 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039581872468285277, + "loss": 0.632, + "step": 195 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003957305507100868, + "loss": 0.5857, + "step": 196 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039564146674214164, + "loss": 0.6311, + "step": 197 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003955514731931885, + "loss": 0.5889, + "step": 198 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039546057048162763, + "loss": 0.5201, + "step": 199 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039536875903008607, + "loss": 0.5581, + "step": 200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039527603926541586, + "loss": 0.5104, + "step": 201 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039518241161869193, + "loss": 0.5978, + "step": 202 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039508787652521013, + "loss": 0.6244, + "step": 203 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039499243442448536, + "loss": 0.589, + "step": 204 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003948960857602493, + "loss": 0.575, + "step": 205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947988309804485, + "loss": 0.5494, + "step": 206 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003947006705372422, + "loss": 0.4895, + "step": 207 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039460160488700036, + "loss": 0.5479, + "step": 208 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039450163449030124, + "loss": 0.5893, + "step": 209 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003944007598119297, + "loss": 0.5451, + "step": 210 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003942989813208747, + "loss": 0.5582, + "step": 211 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003941962994903273, + "loss": 0.5121, + "step": 212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039409271479767826, + "loss": 0.6324, + "step": 213 + }, + { + "epoch": 0.41, + "learning_rate": 0.000393988227724516, + "loss": 0.6118, + "step": 214 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003938828387566244, + "loss": 0.6303, + "step": 215 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003937765483839804, + "loss": 0.7705, + "step": 216 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003936693571007517, + "loss": 0.6224, + "step": 217 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003935612654052946, + "loss": 0.5664, + "step": 218 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039345227380015163, + "loss": 0.66, + "step": 219 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039334238279204906, + "loss": 0.5582, + "step": 220 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039323159289189505, + "loss": 0.6087, + "step": 221 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003931199046147764, + "loss": 0.5566, + "step": 222 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039300731847995716, + "loss": 0.5775, + "step": 223 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039289383501087534, + "loss": 0.5081, + "step": 224 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039277945473514104, + "loss": 0.5218, + "step": 225 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003926641781845338, + "loss": 0.6655, + "step": 226 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003925480058950002, + "loss": 0.5735, + "step": 227 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039243093840665114, + "loss": 0.6609, + "step": 228 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003923129762637596, + "loss": 0.7323, + "step": 229 + }, + { + "epoch": 0.44, + "learning_rate": 0.000392194120014758, + "loss": 0.5703, + "step": 230 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039207437021223583, + "loss": 0.6545, + "step": 231 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003919537274129366, + "loss": 0.521, + "step": 232 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039183219217775564, + "loss": 0.5257, + "step": 233 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003917097650717377, + "loss": 0.5487, + "step": 234 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039158644666407365, + "loss": 0.4861, + "step": 235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039146223752809845, + "loss": 0.4928, + "step": 236 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003913371382412883, + "loss": 0.5253, + "step": 237 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039121114938525756, + "loss": 0.6155, + "step": 238 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039108427154575684, + "loss": 0.55, + "step": 239 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039095650531266967, + "loss": 0.6617, + "step": 240 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039082785128000976, + "loss": 0.5198, + "step": 241 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039069831004591866, + "loss": 0.5302, + "step": 242 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003905678822126625, + "loss": 0.5347, + "step": 243 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039043656838662946, + "loss": 0.531, + "step": 244 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039030436917832697, + "loss": 0.4884, + "step": 245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039017128520237883, + "loss": 0.6027, + "step": 246 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003900373170775222, + "loss": 0.5537, + "step": 247 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038990246542660494, + "loss": 0.5753, + "step": 248 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038976673087658256, + "loss": 0.5059, + "step": 249 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038963011405851537, + "loss": 0.5118, + "step": 250 + }, + { + "epoch": 0.48, + "learning_rate": 0.00038949261560756565, + "loss": 0.5645, + "step": 251 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003893542361629944, + "loss": 0.5623, + "step": 252 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038921497636815866, + "loss": 0.5216, + "step": 253 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003890748368705085, + "loss": 0.4501, + "step": 254 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003889338183215838, + "loss": 0.48, + "step": 255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00038879192137701135, + "loss": 0.5218, + "step": 256 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003886491466965018, + "loss": 0.5858, + "step": 257 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038850549494384685, + "loss": 0.6124, + "step": 258 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038836096678691536, + "loss": 0.4645, + "step": 259 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038821556289765136, + "loss": 0.474, + "step": 260 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038806928395207003, + "loss": 0.4364, + "step": 261 + }, + { + "epoch": 0.5, + "learning_rate": 0.00038792213063025484, + "loss": 0.5821, + "step": 262 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003877741036163547, + "loss": 0.5393, + "step": 263 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003876252035985804, + "loss": 0.5373, + "step": 264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003874754312692013, + "loss": 0.6021, + "step": 265 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003873247873245426, + "loss": 0.4549, + "step": 266 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003871732724649817, + "loss": 0.5994, + "step": 267 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003870208873949453, + "loss": 0.4764, + "step": 268 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038686763282290556, + "loss": 0.4311, + "step": 269 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003867135094613774, + "loss": 0.5462, + "step": 270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003865585180269148, + "loss": 0.5006, + "step": 271 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003864026592401076, + "loss": 0.5347, + "step": 272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038624593382557835, + "loss": 0.5242, + "step": 273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038608834251197856, + "loss": 0.5005, + "step": 274 + }, + { + "epoch": 0.53, + "learning_rate": 0.00038592988603198554, + "loss": 0.5436, + "step": 275 + }, + { + "epoch": 0.53, + "learning_rate": 0.000385770565122299, + "loss": 0.4658, + "step": 276 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003856103805236375, + "loss": 0.5273, + "step": 277 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038544933298073516, + "loss": 0.436, + "step": 278 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038528742324233804, + "loss": 0.4785, + "step": 279 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038512465206120086, + "loss": 0.5366, + "step": 280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038496102019408324, + "loss": 0.4448, + "step": 281 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038479652840174637, + "loss": 0.5132, + "step": 282 + }, + { + "epoch": 0.54, + "learning_rate": 0.00038463117744894955, + "loss": 0.7918, + "step": 283 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038446496810444627, + "loss": 0.5309, + "step": 284 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038429790114098114, + "loss": 0.5316, + "step": 285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038412997733528576, + "loss": 0.4611, + "step": 286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038396119746807563, + "loss": 0.4609, + "step": 287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00038379156232404613, + "loss": 0.5821, + "step": 288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003836210726918691, + "loss": 0.5883, + "step": 289 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003834497293641889, + "loss": 0.5012, + "step": 290 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038327753313761913, + "loss": 0.4457, + "step": 291 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038310448481273867, + "loss": 0.4851, + "step": 292 + }, + { + "epoch": 0.56, + "learning_rate": 0.00038293058519408787, + "loss": 0.5622, + "step": 293 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038275583509016507, + "loss": 0.5703, + "step": 294 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038258023531342265, + "loss": 0.5718, + "step": 295 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003824037866802632, + "loss": 0.5183, + "step": 296 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038222649001103614, + "loss": 0.5085, + "step": 297 + }, + { + "epoch": 0.57, + "learning_rate": 0.00038204834613003323, + "loss": 0.5388, + "step": 298 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038186935586548537, + "loss": 0.5425, + "step": 299 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003816895200495584, + "loss": 0.447, + "step": 300 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003815088395183493, + "loss": 0.5541, + "step": 301 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038132731511188227, + "loss": 0.5518, + "step": 302 + }, + { + "epoch": 0.58, + "learning_rate": 0.000381144947674105, + "loss": 0.5074, + "step": 303 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809617380528847, + "loss": 0.5134, + "step": 304 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807776871000037, + "loss": 0.4599, + "step": 305 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805927956711562, + "loss": 0.5838, + "step": 306 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038040706462594395, + "loss": 0.5216, + "step": 307 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038022049482787216, + "loss": 0.5323, + "step": 308 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003800330871443456, + "loss": 0.5681, + "step": 309 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037984484244666446, + "loss": 0.4172, + "step": 310 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003796557616100207, + "loss": 0.4958, + "step": 311 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003794658455134934, + "loss": 0.662, + "step": 312 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003792750950400451, + "loss": 0.5832, + "step": 313 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003790835110765174, + "loss": 0.4271, + "step": 314 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003788910945136271, + "loss": 0.4842, + "step": 315 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037869784624596186, + "loss": 0.4656, + "step": 316 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037850376717197626, + "loss": 0.4981, + "step": 317 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037830885819398733, + "loss": 0.5162, + "step": 318 + }, + { + "epoch": 0.61, + "learning_rate": 0.00037811312021817067, + "loss": 0.652, + "step": 319 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003779165541545558, + "loss": 0.5104, + "step": 320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003777191609170225, + "loss": 0.4971, + "step": 321 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003775209414232962, + "loss": 0.4871, + "step": 322 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003773218965949436, + "loss": 0.5226, + "step": 323 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037712202735736884, + "loss": 0.4823, + "step": 324 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003769213346398087, + "loss": 0.497, + "step": 325 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003767198193753286, + "loss": 0.5976, + "step": 326 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765174825008181, + "loss": 0.4532, + "step": 327 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003763143249569868, + "loss": 0.5236, + "step": 328 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037611034768835947, + "loss": 0.6513, + "step": 329 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037590555164327224, + "loss": 0.5686, + "step": 330 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037569993777386774, + "loss": 0.456, + "step": 331 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003754935070360909, + "loss": 0.5181, + "step": 332 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003752862603896846, + "loss": 0.4765, + "step": 333 + }, + { + "epoch": 0.64, + "learning_rate": 0.00037507819879818477, + "loss": 0.5363, + "step": 334 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037486932322891646, + "loss": 0.4584, + "step": 335 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037465963465298886, + "loss": 0.5428, + "step": 336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003744491340452913, + "loss": 0.3927, + "step": 337 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003742378223844882, + "loss": 0.5478, + "step": 338 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003740257006530147, + "loss": 0.469, + "step": 339 + }, + { + "epoch": 0.65, + "learning_rate": 0.00037381276983707246, + "loss": 0.5169, + "step": 340 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037359903092662434, + "loss": 0.4797, + "step": 341 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037338448491539054, + "loss": 0.5315, + "step": 342 + }, + { + "epoch": 0.66, + "learning_rate": 0.00037316913280084353, + "loss": 0.4422, + "step": 343 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003729529755842035, + "loss": 0.4426, + "step": 344 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003727360142704337, + "loss": 0.4718, + "step": 345 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003725182498682361, + "loss": 0.5585, + "step": 346 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003722996833900459, + "loss": 0.4775, + "step": 347 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003720803158520279, + "loss": 0.6014, + "step": 348 + }, + { + "epoch": 0.67, + "learning_rate": 0.00037186014827407076, + "loss": 0.5117, + "step": 349 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003716391816797829, + "loss": 0.5404, + "step": 350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003714174170964876, + "loss": 0.527, + "step": 351 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037119485555521796, + "loss": 0.4555, + "step": 352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037097149809071255, + "loss": 0.5372, + "step": 353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00037074734574141016, + "loss": 0.5377, + "step": 354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003705223995494454, + "loss": 0.4925, + "step": 355 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037029666056064345, + "loss": 0.482, + "step": 356 + }, + { + "epoch": 0.69, + "learning_rate": 0.00037007012982451546, + "loss": 0.5235, + "step": 357 + }, + { + "epoch": 0.69, + "learning_rate": 0.00036984280839425356, + "loss": 0.4957, + "step": 358 + }, + { + "epoch": 0.69, + "learning_rate": 0.000369614697326726, + "loss": 0.5379, + "step": 359 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003693857976824721, + "loss": 0.4653, + "step": 360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036915611052569785, + "loss": 0.469, + "step": 361 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003689256369242702, + "loss": 0.5618, + "step": 362 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003686943779497124, + "loss": 0.4459, + "step": 363 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003684623346771995, + "loss": 0.5606, + "step": 364 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003682295081855524, + "loss": 0.4368, + "step": 365 + }, + { + "epoch": 0.7, + "learning_rate": 0.00036799589955723375, + "loss": 0.4168, + "step": 366 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036776150987834243, + "loss": 0.4664, + "step": 367 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036752634023860846, + "loss": 0.4737, + "step": 368 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003672903917313883, + "loss": 0.4247, + "step": 369 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036705366545365935, + "loss": 0.5677, + "step": 370 + }, + { + "epoch": 0.71, + "learning_rate": 0.00036681616250601505, + "loss": 0.5441, + "step": 371 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003665778839926599, + "loss": 0.6247, + "step": 372 + }, + { + "epoch": 0.72, + "learning_rate": 0.00036633883102140405, + "loss": 0.5217, + "step": 373 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003660990047036584, + "loss": 0.4651, + "step": 374 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003658584061544291, + "loss": 0.4648, + "step": 375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003656170364923128, + "loss": 0.6048, + "step": 376 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036537489683949114, + "loss": 0.4515, + "step": 377 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003651319883217255, + "loss": 0.5096, + "step": 378 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036488831206835207, + "loss": 0.4231, + "step": 379 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036464386921227637, + "loss": 0.4903, + "step": 380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036439866088996796, + "loss": 0.5131, + "step": 381 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003641526882414553, + "loss": 0.5986, + "step": 382 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003639059524103203, + "loss": 0.6, + "step": 383 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003636584545436931, + "loss": 0.5216, + "step": 384 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003634101957922468, + "loss": 0.5144, + "step": 385 + }, + { + "epoch": 0.74, + "learning_rate": 0.00036316117731019184, + "loss": 0.4963, + "step": 386 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003629114002552711, + "loss": 0.5657, + "step": 387 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036266086578875384, + "loss": 0.5028, + "step": 388 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003624095750754311, + "loss": 0.573, + "step": 389 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036215752928360967, + "loss": 0.5199, + "step": 390 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003619047295851068, + "loss": 0.656, + "step": 391 + }, + { + "epoch": 0.75, + "learning_rate": 0.00036165117715524506, + "loss": 0.5129, + "step": 392 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036139687317284647, + "loss": 0.3945, + "step": 393 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003611418188202271, + "loss": 0.5318, + "step": 394 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036088601528319196, + "loss": 0.5344, + "step": 395 + }, + { + "epoch": 0.76, + "learning_rate": 0.00036062946375102885, + "loss": 0.5407, + "step": 396 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003603721654165034, + "loss": 0.5364, + "step": 397 + }, + { + "epoch": 0.77, + "learning_rate": 0.00036011412147585306, + "loss": 0.5407, + "step": 398 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003598553331287821, + "loss": 0.5999, + "step": 399 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003595958015784555, + "loss": 0.624, + "step": 400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00035933552803149354, + "loss": 0.5351, + "step": 401 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003590745136979662, + "loss": 0.5196, + "step": 402 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035881275979138765, + "loss": 0.5447, + "step": 403 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003585502675287104, + "loss": 0.4908, + "step": 404 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035828703813031986, + "loss": 0.5172, + "step": 405 + }, + { + "epoch": 0.78, + "learning_rate": 0.00035802307282002834, + "loss": 0.5923, + "step": 406 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003577583728250699, + "loss": 0.568, + "step": 407 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035749293937609395, + "loss": 0.4618, + "step": 408 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003572267737071601, + "loss": 0.5351, + "step": 409 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003569598770557322, + "loss": 0.5285, + "step": 410 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035669225066267256, + "loss": 0.4571, + "step": 411 + }, + { + "epoch": 0.79, + "learning_rate": 0.00035642389577223625, + "loss": 0.4214, + "step": 412 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003561548136320653, + "loss": 0.5393, + "step": 413 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003558850054931828, + "loss": 0.549, + "step": 414 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035561447260998714, + "loss": 0.4824, + "step": 415 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035534321624024656, + "loss": 0.6244, + "step": 416 + }, + { + "epoch": 0.8, + "learning_rate": 0.00035507123764509245, + "loss": 0.5436, + "step": 417 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003547985380890144, + "loss": 0.5198, + "step": 418 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035452511883985366, + "loss": 0.5979, + "step": 419 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035425098116879754, + "loss": 0.4158, + "step": 420 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035397612635037356, + "loss": 0.5125, + "step": 421 + }, + { + "epoch": 0.81, + "learning_rate": 0.00035370055566244334, + "loss": 0.4699, + "step": 422 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003534242703861966, + "loss": 0.5553, + "step": 423 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035314727180614573, + "loss": 0.5969, + "step": 424 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035286956121011897, + "loss": 0.456, + "step": 425 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003525911398892552, + "loss": 0.5195, + "step": 426 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003523120091379975, + "loss": 0.5187, + "step": 427 + }, + { + "epoch": 0.82, + "learning_rate": 0.00035203217025408726, + "loss": 0.5443, + "step": 428 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003517516245385582, + "loss": 0.4476, + "step": 429 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003514703732957301, + "loss": 0.5757, + "step": 430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035118841783320304, + "loss": 0.5129, + "step": 431 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035090575946185114, + "loss": 0.6354, + "step": 432 + }, + { + "epoch": 0.83, + "learning_rate": 0.00035062239949581645, + "loss": 0.4065, + "step": 433 + }, + { + "epoch": 0.84, + "learning_rate": 0.000350338339252503, + "loss": 0.5472, + "step": 434 + }, + { + "epoch": 0.84, + "learning_rate": 0.00035005358005257045, + "loss": 0.5424, + "step": 435 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034976812321992816, + "loss": 0.6127, + "step": 436 + }, + { + "epoch": 0.84, + "learning_rate": 0.00034948197008172877, + "loss": 0.63, + "step": 437 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003491951219683625, + "loss": 0.413, + "step": 438 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034890758021345034, + "loss": 0.5435, + "step": 439 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034861934615383844, + "loss": 0.5433, + "step": 440 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034833042112959153, + "loss": 0.4763, + "step": 441 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034804080648398667, + "loss": 0.5727, + "step": 442 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034775050356350727, + "loss": 0.5392, + "step": 443 + }, + { + "epoch": 0.85, + "learning_rate": 0.00034745951371783666, + "loss": 0.4981, + "step": 444 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003471678382998518, + "loss": 0.5516, + "step": 445 + }, + { + "epoch": 0.86, + "learning_rate": 0.00034687547866561703, + "loss": 0.4965, + "step": 446 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003465824361743779, + "loss": 0.4982, + "step": 447 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003462887121885544, + "loss": 0.5619, + "step": 448 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003459943080737353, + "loss": 0.5273, + "step": 449 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034569922519867133, + "loss": 0.517, + "step": 450 + }, + { + "epoch": 0.87, + "learning_rate": 0.00034540346493526876, + "loss": 0.4874, + "step": 451 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003451070286585833, + "loss": 0.5966, + "step": 452 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003448099177468137, + "loss": 0.4487, + "step": 453 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003445121335812951, + "loss": 0.5091, + "step": 454 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003442136775464929, + "loss": 0.407, + "step": 455 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003439145510299958, + "loss": 0.6327, + "step": 456 + }, + { + "epoch": 0.88, + "learning_rate": 0.00034361475542251025, + "loss": 0.4217, + "step": 457 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003433142921178531, + "loss": 0.6102, + "step": 458 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003430131625129456, + "loss": 0.5556, + "step": 459 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034271136800780673, + "loss": 0.4986, + "step": 460 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003424089100055467, + "loss": 0.5406, + "step": 461 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034210578991236056, + "loss": 0.5881, + "step": 462 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034180200913752157, + "loss": 0.4869, + "step": 463 + }, + { + "epoch": 0.89, + "learning_rate": 0.00034149756909337454, + "loss": 0.5626, + "step": 464 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003411924711953295, + "loss": 0.564, + "step": 465 + }, + { + "epoch": 0.9, + "learning_rate": 0.00034088671686185486, + "loss": 0.6272, + "step": 466 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003405803075144711, + "loss": 0.4643, + "step": 467 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003402732445777438, + "loss": 0.5435, + "step": 468 + }, + { + "epoch": 0.9, + "learning_rate": 0.00033996552947927744, + "loss": 0.5844, + "step": 469 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003396571636497084, + "loss": 0.5362, + "step": 470 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033934814852269865, + "loss": 0.5607, + "step": 471 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003390384855349285, + "loss": 0.4836, + "step": 472 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033872817612609065, + "loss": 0.6555, + "step": 473 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033841722173888315, + "loss": 0.4784, + "step": 474 + }, + { + "epoch": 0.91, + "learning_rate": 0.00033810562381900253, + "loss": 0.5583, + "step": 475 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033779338381513736, + "loss": 0.4679, + "step": 476 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003374805031789613, + "loss": 0.5325, + "step": 477 + }, + { + "epoch": 0.92, + "learning_rate": 0.00033716698336512654, + "loss": 0.6601, + "step": 478 + }, + { + "epoch": 0.92, + "learning_rate": 0.000336852825831257, + "loss": 0.4838, + "step": 479 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003365380320379414, + "loss": 0.5588, + "step": 480 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033622260344872665, + "loss": 0.4596, + "step": 481 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003359065415301108, + "loss": 0.5228, + "step": 482 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033558984775153663, + "loss": 0.5316, + "step": 483 + }, + { + "epoch": 0.93, + "learning_rate": 0.00033527252358538437, + "loss": 0.4761, + "step": 484 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003349545705069653, + "loss": 0.5254, + "step": 485 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003346359899945144, + "loss": 0.4786, + "step": 486 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033431678352918384, + "loss": 0.4302, + "step": 487 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003339969525950361, + "loss": 0.4914, + "step": 488 + }, + { + "epoch": 0.94, + "learning_rate": 0.00033367649867903663, + "loss": 0.4102, + "step": 489 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003333554232710477, + "loss": 0.4698, + "step": 490 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003330337278638207, + "loss": 0.4454, + "step": 491 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033271141395298964, + "loss": 0.4648, + "step": 492 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033238848303706415, + "loss": 0.4616, + "step": 493 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033206493661742237, + "loss": 0.4861, + "step": 494 + }, + { + "epoch": 0.95, + "learning_rate": 0.00033174077619830416, + "loss": 0.4797, + "step": 495 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033141600328680373, + "loss": 0.5104, + "step": 496 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033109061939286336, + "loss": 0.5712, + "step": 497 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033076462602926553, + "loss": 0.5425, + "step": 498 + }, + { + "epoch": 0.96, + "learning_rate": 0.00033043802471162636, + "loss": 0.6156, + "step": 499 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003301108169583887, + "loss": 0.4282, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003297830042908146, + "loss": 0.4088, + "step": 501 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032945458823297857, + "loss": 0.4866, + "step": 502 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003291255703117605, + "loss": 0.5045, + "step": 503 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003287959520568384, + "loss": 0.491, + "step": 504 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032846573500068136, + "loss": 0.458, + "step": 505 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032813492067854246, + "loss": 0.4508, + "step": 506 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003278035106284516, + "loss": 0.4294, + "step": 507 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032747150639120834, + "loss": 0.4834, + "step": 508 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032713890951037477, + "loss": 0.3857, + "step": 509 + }, + { + "epoch": 0.98, + "learning_rate": 0.00032680572153226834, + "loss": 0.4072, + "step": 510 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003264719440059545, + "loss": 0.4028, + "step": 511 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032613757848323977, + "loss": 0.3789, + "step": 512 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032580262651866446, + "loss": 0.4944, + "step": 513 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003254670896694952, + "loss": 0.4259, + "step": 514 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032513096949571805, + "loss": 0.5037, + "step": 515 + }, + { + "epoch": 0.99, + "learning_rate": 0.00032479426756003093, + "loss": 0.5857, + "step": 516 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003244569854278366, + "loss": 0.5407, + "step": 517 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032411912466723524, + "loss": 0.499, + "step": 518 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003237806868490172, + "loss": 0.4359, + "step": 519 + }, + { + "epoch": 1.0, + "learning_rate": 0.00032344167354665573, + "loss": 0.4374, + "step": 520 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003231020863362997, + "loss": 0.4172, + "step": 521 + }, + { + "epoch": 1.01, + "learning_rate": 0.000322761926796766, + "loss": 0.4451, + "step": 522 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003224211965095326, + "loss": 0.4, + "step": 523 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003220798970587309, + "loss": 0.4009, + "step": 524 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003217380300311386, + "loss": 0.3966, + "step": 525 + }, + { + "epoch": 1.01, + "learning_rate": 0.000321395597016172, + "loss": 0.4255, + "step": 526 + }, + { + "epoch": 1.01, + "learning_rate": 0.00032105259960587895, + "loss": 0.4707, + "step": 527 + }, + { + "epoch": 1.02, + "learning_rate": 0.00032070903939493124, + "loss": 0.5313, + "step": 528 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003203649179806172, + "loss": 0.3596, + "step": 529 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003200202369628345, + "loss": 0.5223, + "step": 530 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031967499794408234, + "loss": 0.4146, + "step": 531 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031932920252945423, + "loss": 0.4328, + "step": 532 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003189828523266306, + "loss": 0.4258, + "step": 533 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031863594894587105, + "loss": 0.4457, + "step": 534 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003182884940000072, + "loss": 0.5249, + "step": 535 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003179404891044348, + "loss": 0.4751, + "step": 536 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031759193587710676, + "loss": 0.5378, + "step": 537 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031724283593852497, + "loss": 0.634, + "step": 538 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031689319091173326, + "loss": 0.4298, + "step": 539 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031654300242230977, + "loss": 0.5469, + "step": 540 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031619227209835917, + "loss": 0.5153, + "step": 541 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003158410015705053, + "loss": 0.4144, + "step": 542 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003154891924718837, + "loss": 0.6041, + "step": 543 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003151368464381335, + "loss": 0.4891, + "step": 544 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003147839651073904, + "loss": 0.5258, + "step": 545 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031443055012027874, + "loss": 0.4351, + "step": 546 + }, + { + "epoch": 1.05, + "learning_rate": 0.000314076603119904, + "loss": 0.4556, + "step": 547 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031372212575184514, + "loss": 0.5445, + "step": 548 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031336711966414675, + "loss": 0.5585, + "step": 549 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003130115865073117, + "loss": 0.367, + "step": 550 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003126555279342933, + "loss": 0.4877, + "step": 551 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003122989456004876, + "loss": 0.4335, + "step": 552 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003119418411637258, + "loss": 0.4383, + "step": 553 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003115842162842663, + "loss": 0.4508, + "step": 554 + }, + { + "epoch": 1.07, + "learning_rate": 0.00031122607262478743, + "loss": 0.4631, + "step": 555 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003108674118503793, + "loss": 0.3496, + "step": 556 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003105082356285361, + "loss": 0.4108, + "step": 557 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003101485456291486, + "loss": 0.4877, + "step": 558 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030978834352449614, + "loss": 0.3696, + "step": 559 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030942763098923913, + "loss": 0.5138, + "step": 560 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030906640970041084, + "loss": 0.5961, + "step": 561 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003087046813374099, + "loss": 0.3824, + "step": 562 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030834244758199276, + "loss": 0.4925, + "step": 563 + }, + { + "epoch": 1.09, + "learning_rate": 0.000307979710118265, + "loss": 0.4511, + "step": 564 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030761647063267457, + "loss": 0.4306, + "step": 565 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003072527308140031, + "loss": 0.468, + "step": 566 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030688849235335856, + "loss": 0.4842, + "step": 567 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003065237569441671, + "loss": 0.4332, + "step": 568 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030615852628216537, + "loss": 0.4637, + "step": 569 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003057928020653925, + "loss": 0.6193, + "step": 570 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003054265859941824, + "loss": 0.5033, + "step": 571 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030505987977115555, + "loss": 0.4185, + "step": 572 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003046926851012114, + "loss": 0.4211, + "step": 573 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003043250036915201, + "loss": 0.5089, + "step": 574 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030395683725151505, + "loss": 0.517, + "step": 575 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003035881874928845, + "loss": 0.492, + "step": 576 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003032190561295636, + "loss": 0.4535, + "step": 577 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003028494448777269, + "loss": 0.3947, + "step": 578 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030247935545577986, + "loss": 0.3125, + "step": 579 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003021087895843511, + "loss": 0.3882, + "step": 580 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003017377489862845, + "loss": 0.4802, + "step": 581 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030136623538663083, + "loss": 0.4652, + "step": 582 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030099425051263994, + "loss": 0.3816, + "step": 583 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003006217960937529, + "loss": 0.4583, + "step": 584 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030024887386159385, + "loss": 0.4568, + "step": 585 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029987548554996174, + "loss": 0.3908, + "step": 586 + }, + { + "epoch": 1.13, + "learning_rate": 0.0002995016328948225, + "loss": 0.4235, + "step": 587 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029912731763430075, + "loss": 0.4138, + "step": 588 + }, + { + "epoch": 1.13, + "learning_rate": 0.00029875254150867216, + "loss": 0.5838, + "step": 589 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002983773062603548, + "loss": 0.462, + "step": 590 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029800161363390145, + "loss": 0.4632, + "step": 591 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029762546537599125, + "loss": 0.5898, + "step": 592 + }, + { + "epoch": 1.14, + "learning_rate": 0.0002972488632354218, + "loss": 0.4742, + "step": 593 + }, + { + "epoch": 1.14, + "learning_rate": 0.00029687180896310065, + "loss": 0.4579, + "step": 594 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002964943043120378, + "loss": 0.5514, + "step": 595 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029611635103733675, + "loss": 0.4304, + "step": 596 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002957379508961871, + "loss": 0.4383, + "step": 597 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029535910564785584, + "loss": 0.5327, + "step": 598 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029497981705367933, + "loss": 0.4781, + "step": 599 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029460008687705525, + "loss": 0.4178, + "step": 600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002942199168834342, + "loss": 0.3987, + "step": 601 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029383930884031183, + "loss": 0.3861, + "step": 602 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029345826451722005, + "loss": 0.5322, + "step": 603 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029307678568571936, + "loss": 0.3997, + "step": 604 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002926948741193903, + "loss": 0.4121, + "step": 605 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029231253159382514, + "loss": 0.4931, + "step": 606 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029192975988662017, + "loss": 0.4626, + "step": 607 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029154656077736666, + "loss": 0.4441, + "step": 608 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002911629360476432, + "loss": 0.3863, + "step": 609 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029077888748100703, + "loss": 0.36, + "step": 610 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029039441686298594, + "loss": 0.4246, + "step": 611 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002900095259810702, + "loss": 0.4916, + "step": 612 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028962421662470346, + "loss": 0.4896, + "step": 613 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028923849058527535, + "loss": 0.4237, + "step": 614 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028885234965611274, + "loss": 0.5727, + "step": 615 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028846579563247116, + "loss": 0.5681, + "step": 616 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002880788303115269, + "loss": 0.4383, + "step": 617 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028769145549236845, + "loss": 0.4962, + "step": 618 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002873036729759881, + "loss": 0.5472, + "step": 619 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002869154845652738, + "loss": 0.5431, + "step": 620 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002865268920650003, + "loss": 0.4152, + "step": 621 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002861378972818211, + "loss": 0.3922, + "step": 622 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002857485020242602, + "loss": 0.5129, + "step": 623 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002853587081027034, + "loss": 0.4328, + "step": 624 + }, + { + "epoch": 1.2, + "learning_rate": 0.00028496851732938997, + "loss": 0.4431, + "step": 625 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002845779315184042, + "loss": 0.4968, + "step": 626 + }, + { + "epoch": 1.21, + "learning_rate": 0.000284186952485667, + "loss": 0.5301, + "step": 627 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002837955820489276, + "loss": 0.4332, + "step": 628 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002834038220277546, + "loss": 0.4245, + "step": 629 + }, + { + "epoch": 1.21, + "learning_rate": 0.00028301167424352836, + "loss": 0.5057, + "step": 630 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028261914051943166, + "loss": 0.4623, + "step": 631 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028222622268044174, + "loss": 0.5452, + "step": 632 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028183292255332164, + "loss": 0.5238, + "step": 633 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028143924196661176, + "loss": 0.3966, + "step": 634 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002810451827506214, + "loss": 0.35, + "step": 635 + }, + { + "epoch": 1.23, + "learning_rate": 0.00028065074673742007, + "loss": 0.4325, + "step": 636 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002802559357608292, + "loss": 0.4854, + "step": 637 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027986075165641343, + "loss": 0.4254, + "step": 638 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027946519626147225, + "loss": 0.4614, + "step": 639 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027906927141503125, + "loss": 0.3798, + "step": 640 + }, + { + "epoch": 1.23, + "learning_rate": 0.00027867297895783373, + "loss": 0.4742, + "step": 641 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002782763207323322, + "loss": 0.4007, + "step": 642 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002778792985826795, + "loss": 0.4383, + "step": 643 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002774819143547206, + "loss": 0.4298, + "step": 644 + }, + { + "epoch": 1.24, + "learning_rate": 0.00027708416989598387, + "loss": 0.5178, + "step": 645 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002766860670556722, + "loss": 0.3434, + "step": 646 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002762876076846551, + "loss": 0.3862, + "step": 647 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027588879363545934, + "loss": 0.4459, + "step": 648 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002754896267622608, + "loss": 0.3934, + "step": 649 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027509010892087565, + "loss": 0.4349, + "step": 650 + }, + { + "epoch": 1.25, + "learning_rate": 0.000274690241968752, + "loss": 0.4178, + "step": 651 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002742900277649607, + "loss": 0.4151, + "step": 652 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002738894681701874, + "loss": 0.3888, + "step": 653 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027348856504672323, + "loss": 0.4214, + "step": 654 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002730873202584567, + "loss": 0.519, + "step": 655 + }, + { + "epoch": 1.26, + "learning_rate": 0.00027268573567086477, + "loss": 0.5463, + "step": 656 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027228381315100417, + "loss": 0.3367, + "step": 657 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027188155456750256, + "loss": 0.4629, + "step": 658 + }, + { + "epoch": 1.27, + "learning_rate": 0.00027147896179055043, + "loss": 0.4456, + "step": 659 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002710760366918917, + "loss": 0.4348, + "step": 660 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002706727811448153, + "loss": 0.4505, + "step": 661 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002702691970241468, + "loss": 0.5028, + "step": 662 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026986528620623904, + "loss": 0.5257, + "step": 663 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026946105056896403, + "loss": 0.4977, + "step": 664 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026905649199170377, + "loss": 0.421, + "step": 665 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002686516123553417, + "loss": 0.4931, + "step": 666 + }, + { + "epoch": 1.28, + "learning_rate": 0.00026824641354225397, + "loss": 0.5818, + "step": 667 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002678408974363005, + "loss": 0.4211, + "step": 668 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026743506592281674, + "loss": 0.5182, + "step": 669 + }, + { + "epoch": 1.29, + "learning_rate": 0.00026702892088860413, + "loss": 0.5591, + "step": 670 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002666224642219221, + "loss": 0.5363, + "step": 671 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002662156978124786, + "loss": 0.5866, + "step": 672 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002658086235514218, + "loss": 0.422, + "step": 673 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002654012433313312, + "loss": 0.5375, + "step": 674 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002649935590462087, + "loss": 0.4752, + "step": 675 + }, + { + "epoch": 1.3, + "learning_rate": 0.00026458557259146986, + "loss": 0.4271, + "step": 676 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002641772858639351, + "loss": 0.4843, + "step": 677 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026376870076182086, + "loss": 0.4827, + "step": 678 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026335981918473086, + "loss": 0.47, + "step": 679 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002629506430336472, + "loss": 0.368, + "step": 680 + }, + { + "epoch": 1.31, + "learning_rate": 0.00026254117421092133, + "loss": 0.481, + "step": 681 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002621314146202656, + "loss": 0.4153, + "step": 682 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002617213661667443, + "loss": 0.4397, + "step": 683 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002613110307567643, + "loss": 0.4052, + "step": 684 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026090041029806695, + "loss": 0.4652, + "step": 685 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026048950669971884, + "loss": 0.3826, + "step": 686 + }, + { + "epoch": 1.32, + "learning_rate": 0.00026007832187210277, + "loss": 0.5639, + "step": 687 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025966685772690906, + "loss": 0.3917, + "step": 688 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025925511617712685, + "loss": 0.5248, + "step": 689 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002588430991370347, + "loss": 0.3796, + "step": 690 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002584308085221922, + "loss": 0.4391, + "step": 691 + }, + { + "epoch": 1.33, + "learning_rate": 0.00025801824624943084, + "loss": 0.4514, + "step": 692 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025760541423684496, + "loss": 0.5046, + "step": 693 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002571923144037831, + "loss": 0.4578, + "step": 694 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002567789486708389, + "loss": 0.4681, + "step": 695 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025636531895984236, + "loss": 0.4501, + "step": 696 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002559514271938506, + "loss": 0.4411, + "step": 697 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025553727529713916, + "loss": 0.401, + "step": 698 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025512286519519293, + "loss": 0.4911, + "step": 699 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002547081988146974, + "loss": 0.3754, + "step": 700 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025429327808352946, + "loss": 0.3807, + "step": 701 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002538781049307486, + "loss": 0.4193, + "step": 702 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002534626812865876, + "loss": 0.5259, + "step": 703 + }, + { + "epoch": 1.36, + "learning_rate": 0.00025304700908244433, + "loss": 0.3684, + "step": 704 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002526310902508718, + "loss": 0.5423, + "step": 705 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002522149267255699, + "loss": 0.4288, + "step": 706 + }, + { + "epoch": 1.36, + "learning_rate": 0.000251798520441376, + "loss": 0.5046, + "step": 707 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002513818733342564, + "loss": 0.3777, + "step": 708 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025096498734129667, + "loss": 0.5171, + "step": 709 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002505478644006932, + "loss": 0.3785, + "step": 710 + }, + { + "epoch": 1.37, + "learning_rate": 0.00025013050645174414, + "loss": 0.5413, + "step": 711 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024971291543483994, + "loss": 0.5018, + "step": 712 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024929509329145477, + "loss": 0.5212, + "step": 713 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024887704196413746, + "loss": 0.483, + "step": 714 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002484587633965023, + "loss": 0.3684, + "step": 715 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024804025953322005, + "loss": 0.3782, + "step": 716 + }, + { + "epoch": 1.38, + "learning_rate": 0.00024762153232000877, + "loss": 0.4995, + "step": 717 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002472025837036253, + "loss": 0.4324, + "step": 718 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002467834156318555, + "loss": 0.5203, + "step": 719 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002463640300535057, + "loss": 0.423, + "step": 720 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002459444289183933, + "loss": 0.4537, + "step": 721 + }, + { + "epoch": 1.39, + "learning_rate": 0.00024552461417733817, + "loss": 0.4124, + "step": 722 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002451045877821528, + "loss": 0.4865, + "step": 723 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002446843516856343, + "loss": 0.4845, + "step": 724 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024426390784155425, + "loss": 0.4174, + "step": 725 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024384325820465033, + "loss": 0.4456, + "step": 726 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002434224047306169, + "loss": 0.4429, + "step": 727 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002430013493760961, + "loss": 0.363, + "step": 728 + }, + { + "epoch": 1.4, + "learning_rate": 0.00024258009409866853, + "loss": 0.4769, + "step": 729 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024215864085684442, + "loss": 0.4597, + "step": 730 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024173699161005429, + "loss": 0.366, + "step": 731 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024131514831863995, + "loss": 0.4746, + "step": 732 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002408931129438453, + "loss": 0.5608, + "step": 733 + }, + { + "epoch": 1.41, + "learning_rate": 0.00024047088744780744, + "loss": 0.4292, + "step": 734 + }, + { + "epoch": 1.42, + "learning_rate": 0.00024004847379354726, + "loss": 0.4743, + "step": 735 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023962587394496038, + "loss": 0.3855, + "step": 736 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023920308986680834, + "loss": 0.4573, + "step": 737 + }, + { + "epoch": 1.42, + "learning_rate": 0.00023878012352470892, + "loss": 0.3937, + "step": 738 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002383569768851274, + "loss": 0.4371, + "step": 739 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023793365191536735, + "loss": 0.5432, + "step": 740 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023751015058356135, + "loss": 0.4803, + "step": 741 + }, + { + "epoch": 1.43, + "learning_rate": 0.000237086474858662, + "loss": 0.4281, + "step": 742 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023666262671043263, + "loss": 0.4031, + "step": 743 + }, + { + "epoch": 1.43, + "learning_rate": 0.00023623860810943826, + "loss": 0.4725, + "step": 744 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002358144210270364, + "loss": 0.4644, + "step": 745 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023539006743536774, + "loss": 0.4848, + "step": 746 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023496554930734718, + "loss": 0.4084, + "step": 747 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023454086861665472, + "loss": 0.4322, + "step": 748 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023411602733772595, + "loss": 0.4847, + "step": 749 + }, + { + "epoch": 1.44, + "learning_rate": 0.00023369102744574312, + "loss": 0.4298, + "step": 750 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023326587091662603, + "loss": 0.4268, + "step": 751 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023284055972702254, + "loss": 0.4089, + "step": 752 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002324150958542997, + "loss": 0.4214, + "step": 753 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023198948127653446, + "loss": 0.5576, + "step": 754 + }, + { + "epoch": 1.45, + "learning_rate": 0.00023156371797250418, + "loss": 0.4377, + "step": 755 + }, + { + "epoch": 1.46, + "learning_rate": 0.00023113780792167785, + "loss": 0.4934, + "step": 756 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002307117531042068, + "loss": 0.3698, + "step": 757 + }, + { + "epoch": 1.46, + "learning_rate": 0.00023028555550091536, + "loss": 0.4722, + "step": 758 + }, + { + "epoch": 1.46, + "learning_rate": 0.00022985921709329157, + "loss": 0.3837, + "step": 759 + }, + { + "epoch": 1.46, + "learning_rate": 0.00022943273986347822, + "loss": 0.5132, + "step": 760 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002290061257942635, + "loss": 0.487, + "step": 761 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022857937686907183, + "loss": 0.3857, + "step": 762 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022815249507195445, + "loss": 0.4135, + "step": 763 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022772548238758064, + "loss": 0.4639, + "step": 764 + }, + { + "epoch": 1.47, + "learning_rate": 0.00022729834080122791, + "loss": 0.5297, + "step": 765 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022687107229877324, + "loss": 0.4485, + "step": 766 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022644367886668357, + "loss": 0.467, + "step": 767 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022601616249200675, + "loss": 0.4304, + "step": 768 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022558852516236217, + "loss": 0.5531, + "step": 769 + }, + { + "epoch": 1.48, + "learning_rate": 0.00022516076886593158, + "loss": 0.5021, + "step": 770 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022473289559144988, + "loss": 0.433, + "step": 771 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022430490732819566, + "loss": 0.505, + "step": 772 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022387680606598235, + "loss": 0.4677, + "step": 773 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022344859379514858, + "loss": 0.4421, + "step": 774 + }, + { + "epoch": 1.49, + "learning_rate": 0.00022302027250654905, + "loss": 0.4282, + "step": 775 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002225918441915456, + "loss": 0.366, + "step": 776 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022216331084199724, + "loss": 0.4147, + "step": 777 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022173467445025158, + "loss": 0.586, + "step": 778 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022130593700913522, + "loss": 0.5285, + "step": 779 + }, + { + "epoch": 1.5, + "learning_rate": 0.00022087710051194463, + "loss": 0.4484, + "step": 780 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002204481669524367, + "loss": 0.4063, + "step": 781 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002200191383248197, + "loss": 0.4751, + "step": 782 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021959001662374373, + "loss": 0.3936, + "step": 783 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021916080384429184, + "loss": 0.4433, + "step": 784 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002187315019819703, + "loss": 0.4883, + "step": 785 + }, + { + "epoch": 1.51, + "learning_rate": 0.00021830211303269965, + "loss": 0.4925, + "step": 786 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021787263899280537, + "loss": 0.4597, + "step": 787 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021744308185900848, + "loss": 0.4954, + "step": 788 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021701344362841626, + "loss": 0.4025, + "step": 789 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021658372629851318, + "loss": 0.5734, + "step": 790 + }, + { + "epoch": 1.52, + "learning_rate": 0.00021615393186715128, + "loss": 0.3779, + "step": 791 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021572406233254116, + "loss": 0.4994, + "step": 792 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021529411969324275, + "loss": 0.5359, + "step": 793 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021486410594815554, + "loss": 0.4738, + "step": 794 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021443402309650979, + "loss": 0.4915, + "step": 795 + }, + { + "epoch": 1.53, + "learning_rate": 0.00021400387313785704, + "loss": 0.4991, + "step": 796 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021357365807206087, + "loss": 0.4503, + "step": 797 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002131433798992874, + "loss": 0.4887, + "step": 798 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021271304061999633, + "loss": 0.4279, + "step": 799 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021228264223493139, + "loss": 0.5367, + "step": 800 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021185218674511097, + "loss": 0.3212, + "step": 801 + }, + { + "epoch": 1.54, + "learning_rate": 0.00021142167615181915, + "loss": 0.493, + "step": 802 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002109911124565962, + "loss": 0.3635, + "step": 803 + }, + { + "epoch": 1.55, + "learning_rate": 0.00021056049766122916, + "loss": 0.3494, + "step": 804 + }, + { + "epoch": 1.55, + "learning_rate": 0.00021012983376774254, + "loss": 0.3536, + "step": 805 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002096991227783895, + "loss": 0.4335, + "step": 806 + }, + { + "epoch": 1.55, + "learning_rate": 0.00020926836669564168, + "loss": 0.4673, + "step": 807 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020883756752218075, + "loss": 0.5269, + "step": 808 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002084067272608886, + "loss": 0.4365, + "step": 809 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020797584791483806, + "loss": 0.2998, + "step": 810 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020754493148728375, + "loss": 0.4318, + "step": 811 + }, + { + "epoch": 1.56, + "learning_rate": 0.00020711397998165264, + "loss": 0.3205, + "step": 812 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020668299540153493, + "loss": 0.5026, + "step": 813 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020625197975067438, + "loss": 0.3923, + "step": 814 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002058209350329594, + "loss": 0.3026, + "step": 815 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020538986325241342, + "loss": 0.4729, + "step": 816 + }, + { + "epoch": 1.57, + "learning_rate": 0.00020495876641318567, + "loss": 0.3236, + "step": 817 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002045276465195419, + "loss": 0.4934, + "step": 818 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020409650557585523, + "loss": 0.3791, + "step": 819 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020366534558659635, + "loss": 0.3879, + "step": 820 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020323416855632477, + "loss": 0.3807, + "step": 821 + }, + { + "epoch": 1.58, + "learning_rate": 0.00020280297648967897, + "loss": 0.415, + "step": 822 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020237177139136758, + "loss": 0.3824, + "step": 823 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002019405552661596, + "loss": 0.3854, + "step": 824 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020150933011887543, + "loss": 0.3781, + "step": 825 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020107809795437745, + "loss": 0.3904, + "step": 826 + }, + { + "epoch": 1.59, + "learning_rate": 0.00020064686077756057, + "loss": 0.3213, + "step": 827 + }, + { + "epoch": 1.6, + "learning_rate": 0.00020021562059334302, + "loss": 0.4888, + "step": 828 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019978437940665702, + "loss": 0.3375, + "step": 829 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001993531392224394, + "loss": 0.4325, + "step": 830 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019892190204562257, + "loss": 0.2865, + "step": 831 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001984906698811246, + "loss": 0.3984, + "step": 832 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019805944473384038, + "loss": 0.3011, + "step": 833 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019762822860863247, + "loss": 0.4063, + "step": 834 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019719702351032105, + "loss": 0.4489, + "step": 835 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019676583144367525, + "loss": 0.3991, + "step": 836 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019633465441340367, + "loss": 0.2736, + "step": 837 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019590349442414484, + "loss": 0.3842, + "step": 838 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001954723534804581, + "loss": 0.346, + "step": 839 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001950412335868144, + "loss": 0.3435, + "step": 840 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019461013674758668, + "loss": 0.4139, + "step": 841 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019417906496704064, + "loss": 0.4031, + "step": 842 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019374802024932567, + "loss": 0.352, + "step": 843 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019331700459846517, + "loss": 0.332, + "step": 844 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019288602001834735, + "loss": 0.3822, + "step": 845 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019245506851271632, + "loss": 0.3912, + "step": 846 + }, + { + "epoch": 1.63, + "learning_rate": 0.000192024152085162, + "loss": 0.4028, + "step": 847 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019159327273911145, + "loss": 0.3625, + "step": 848 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019116243247781927, + "loss": 0.392, + "step": 849 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019073163330435842, + "loss": 0.3687, + "step": 850 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019030087722161055, + "loss": 0.3794, + "step": 851 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018987016623225747, + "loss": 0.4095, + "step": 852 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018943950233877094, + "loss": 0.3665, + "step": 853 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018900888754340382, + "loss": 0.3826, + "step": 854 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001885783238481809, + "loss": 0.4903, + "step": 855 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018814781325488905, + "loss": 0.3174, + "step": 856 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018771735776506866, + "loss": 0.3987, + "step": 857 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001872869593800037, + "loss": 0.4488, + "step": 858 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018685662010071261, + "loss": 0.3298, + "step": 859 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001864263419279392, + "loss": 0.3385, + "step": 860 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018599612686214304, + "loss": 0.4442, + "step": 861 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018556597690349023, + "loss": 0.3132, + "step": 862 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001851358940518445, + "loss": 0.5051, + "step": 863 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001847058803067573, + "loss": 0.3919, + "step": 864 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018427593766745884, + "loss": 0.4193, + "step": 865 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001838460681328488, + "loss": 0.3196, + "step": 866 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018341627370148692, + "loss": 0.6047, + "step": 867 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018298655637158376, + "loss": 0.3741, + "step": 868 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018255691814099157, + "loss": 0.4033, + "step": 869 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018212736100719465, + "loss": 0.422, + "step": 870 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018169788696730034, + "loss": 0.3358, + "step": 871 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018126849801802976, + "loss": 0.3295, + "step": 872 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018083919615570823, + "loss": 0.4859, + "step": 873 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018040998337625627, + "loss": 0.2917, + "step": 874 + }, + { + "epoch": 1.69, + "learning_rate": 0.00017998086167518034, + "loss": 0.3505, + "step": 875 + }, + { + "epoch": 1.69, + "learning_rate": 0.00017955183304756332, + "loss": 0.5962, + "step": 876 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001791228994880554, + "loss": 0.3097, + "step": 877 + }, + { + "epoch": 1.69, + "learning_rate": 0.00017869406299086482, + "loss": 0.4547, + "step": 878 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001782653255497485, + "loss": 0.4567, + "step": 879 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001778366891580028, + "loss": 0.3815, + "step": 880 + }, + { + "epoch": 1.7, + "learning_rate": 0.00017740815580845446, + "loss": 0.5061, + "step": 881 + }, + { + "epoch": 1.7, + "learning_rate": 0.00017697972749345094, + "loss": 0.3729, + "step": 882 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001765514062048515, + "loss": 0.3626, + "step": 883 + }, + { + "epoch": 1.7, + "learning_rate": 0.00017612319393401772, + "loss": 0.4256, + "step": 884 + }, + { + "epoch": 1.7, + "learning_rate": 0.00017569509267180433, + "loss": 0.4114, + "step": 885 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017526710440855017, + "loss": 0.3554, + "step": 886 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017483923113406844, + "loss": 0.4741, + "step": 887 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017441147483763785, + "loss": 0.4472, + "step": 888 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017398383750799327, + "loss": 0.4325, + "step": 889 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017355632113331648, + "loss": 0.4184, + "step": 890 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017312892770122678, + "loss": 0.468, + "step": 891 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001727016591987721, + "loss": 0.3996, + "step": 892 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017227451761241938, + "loss": 0.4718, + "step": 893 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017184750492804554, + "loss": 0.4076, + "step": 894 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017142062313092824, + "loss": 0.337, + "step": 895 + }, + { + "epoch": 1.73, + "learning_rate": 0.00017099387420573656, + "loss": 0.3534, + "step": 896 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001705672601365218, + "loss": 0.4757, + "step": 897 + }, + { + "epoch": 1.73, + "learning_rate": 0.00017014078290670848, + "loss": 0.4629, + "step": 898 + }, + { + "epoch": 1.73, + "learning_rate": 0.00016971444449908474, + "loss": 0.4999, + "step": 899 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001692882468957932, + "loss": 0.4404, + "step": 900 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001688621920783222, + "loss": 0.4102, + "step": 901 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016843628202749592, + "loss": 0.4285, + "step": 902 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001680105187234656, + "loss": 0.4326, + "step": 903 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001675849041457003, + "loss": 0.507, + "step": 904 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016715944027297753, + "loss": 0.3976, + "step": 905 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016673412908337401, + "loss": 0.4173, + "step": 906 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001663089725542569, + "loss": 0.4053, + "step": 907 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001658839726622741, + "loss": 0.4606, + "step": 908 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016545913138334535, + "loss": 0.3376, + "step": 909 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016503445069265287, + "loss": 0.4466, + "step": 910 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016460993256463228, + "loss": 0.3034, + "step": 911 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016418557897296366, + "loss": 0.4127, + "step": 912 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016376139189056176, + "loss": 0.4437, + "step": 913 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001633373732895674, + "loss": 0.4479, + "step": 914 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016291352514133807, + "loss": 0.4006, + "step": 915 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016248984941643873, + "loss": 0.4428, + "step": 916 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016206634808463267, + "loss": 0.3704, + "step": 917 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016164302311487262, + "loss": 0.3807, + "step": 918 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016121987647529115, + "loss": 0.4194, + "step": 919 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016079691013319168, + "loss": 0.4486, + "step": 920 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016037412605503966, + "loss": 0.3642, + "step": 921 + }, + { + "epoch": 1.78, + "learning_rate": 0.00015995152620645287, + "loss": 0.4344, + "step": 922 + }, + { + "epoch": 1.78, + "learning_rate": 0.00015952911255219258, + "loss": 0.3868, + "step": 923 + }, + { + "epoch": 1.78, + "learning_rate": 0.00015910688705615472, + "loss": 0.4161, + "step": 924 + }, + { + "epoch": 1.78, + "learning_rate": 0.00015868485168136015, + "loss": 0.4094, + "step": 925 + }, + { + "epoch": 1.78, + "learning_rate": 0.00015826300838994573, + "loss": 0.4528, + "step": 926 + }, + { + "epoch": 1.79, + "learning_rate": 0.00015784135914315563, + "loss": 0.3656, + "step": 927 + }, + { + "epoch": 1.79, + "learning_rate": 0.00015741990590133152, + "loss": 0.4982, + "step": 928 + }, + { + "epoch": 1.79, + "learning_rate": 0.00015699865062390392, + "loss": 0.4157, + "step": 929 + }, + { + "epoch": 1.79, + "learning_rate": 0.00015657759526938313, + "loss": 0.3153, + "step": 930 + }, + { + "epoch": 1.79, + "learning_rate": 0.00015615674179534977, + "loss": 0.4343, + "step": 931 + }, + { + "epoch": 1.8, + "learning_rate": 0.00015573609215844582, + "loss": 0.4922, + "step": 932 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001553156483143658, + "loss": 0.5049, + "step": 933 + }, + { + "epoch": 1.8, + "learning_rate": 0.00015489541221784726, + "loss": 0.4128, + "step": 934 + }, + { + "epoch": 1.8, + "learning_rate": 0.00015447538582266193, + "loss": 0.3353, + "step": 935 + }, + { + "epoch": 1.8, + "learning_rate": 0.00015405557108160673, + "loss": 0.4553, + "step": 936 + }, + { + "epoch": 1.8, + "learning_rate": 0.00015363596994649433, + "loss": 0.406, + "step": 937 + }, + { + "epoch": 1.81, + "learning_rate": 0.00015321658436814455, + "loss": 0.4339, + "step": 938 + }, + { + "epoch": 1.81, + "learning_rate": 0.00015279741629637477, + "loss": 0.4105, + "step": 939 + }, + { + "epoch": 1.81, + "learning_rate": 0.00015237846767999122, + "loss": 0.4491, + "step": 940 + }, + { + "epoch": 1.81, + "learning_rate": 0.00015195974046678003, + "loss": 0.3728, + "step": 941 + }, + { + "epoch": 1.81, + "learning_rate": 0.00015154123660349774, + "loss": 0.3357, + "step": 942 + }, + { + "epoch": 1.82, + "learning_rate": 0.00015112295803586256, + "loss": 0.3035, + "step": 943 + }, + { + "epoch": 1.82, + "learning_rate": 0.00015070490670854528, + "loss": 0.347, + "step": 944 + }, + { + "epoch": 1.82, + "learning_rate": 0.00015028708456516017, + "loss": 0.4472, + "step": 945 + }, + { + "epoch": 1.82, + "learning_rate": 0.00014986949354825588, + "loss": 0.3888, + "step": 946 + }, + { + "epoch": 1.82, + "learning_rate": 0.00014945213559930677, + "loss": 0.4407, + "step": 947 + }, + { + "epoch": 1.83, + "learning_rate": 0.00014903501265870338, + "loss": 0.5161, + "step": 948 + }, + { + "epoch": 1.83, + "learning_rate": 0.00014861812666574364, + "loss": 0.4002, + "step": 949 + }, + { + "epoch": 1.83, + "learning_rate": 0.000148201479558624, + "loss": 0.3079, + "step": 950 + } + ], + "logging_steps": 1, + "max_steps": 1557, + "num_train_epochs": 3, + "save_steps": 50, + "total_flos": 1.2732085224403108e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-950/training_args.bin b/checkpoint-950/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c4843df7aa1383a371fb28dea27d303b1a1145e1 --- /dev/null +++ b/checkpoint-950/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:637e0437b5818f76ea2fea2aa5b87010fc39a85bdfc12277d436c72e69d11811 +size 4155 diff --git a/runs/Oct04_17-40-29_1d91a4065171/events.out.tfevents.1696441230.1d91a4065171.307.0 b/runs/Oct04_17-40-29_1d91a4065171/events.out.tfevents.1696441230.1d91a4065171.307.0 new file mode 100644 index 0000000000000000000000000000000000000000..009fb5a6e6f3e4a487fb5803df7cc12e57e4b1bd --- /dev/null +++ b/runs/Oct04_17-40-29_1d91a4065171/events.out.tfevents.1696441230.1d91a4065171.307.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae46c2aa78109fe31ca441a309d500e731611b459d605805509aaa11f3af18e7 +size 48378 diff --git a/runs/Oct04_19-14-59_1d91a4065171/events.out.tfevents.1696446900.1d91a4065171.25416.0 b/runs/Oct04_19-14-59_1d91a4065171/events.out.tfevents.1696446900.1d91a4065171.25416.0 new file mode 100644 index 0000000000000000000000000000000000000000..ada1e49420f883cf37fa3b592880d492da392f5c --- /dev/null +++ b/runs/Oct04_19-14-59_1d91a4065171/events.out.tfevents.1696446900.1d91a4065171.25416.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84457bdb71c62cf805181e935aaf204d848ac64419f49ddc9efacfbe4ced43ca +size 28439 diff --git a/training_args.bin b/training_args.bin index c62257ab265a8cae4034c6b32ccac107cefb7904..c4843df7aa1383a371fb28dea27d303b1a1145e1 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6eabffe952601acc55592b8f8ba3066e48ba407a023d340dfa5fe5150891586f +oid sha256:637e0437b5818f76ea2fea2aa5b87010fc39a85bdfc12277d436c72e69d11811 size 4155