diff --git a/llama2_13b_peft/linguistics_puzzles/README.md b/llama2_13b_peft/linguistics_puzzles/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4229a8f14f3eede0b3158a1d42fafaa20d27975 --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/README.md @@ -0,0 +1,74 @@ +--- +license: other +library_name: peft +tags: +- llama-factory +- lora +- generated_from_trainer +base_model: /data1/model/llama2/meta-llama/Llama2-13b +model-index: +- name: linguistics_puzzles_no_sys + results: [] +--- + + + +# linguistics_puzzles_no_sys + +This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the linguistics_puzzles_no_sys dataset. +It achieves the following results on the evaluation set: +- Loss: 0.5924 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 4 +- eval_batch_size: 4 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 2 +- total_train_batch_size: 8 +- total_eval_batch_size: 8 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 20 +- num_epochs: 5.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:------:|:----:|:---------------:| +| 1.1276 | 0.5263 | 100 | 1.0876 | +| 0.8128 | 1.0526 | 200 | 0.8153 | +| 0.6705 | 1.5789 | 300 | 0.6892 | +| 0.4876 | 2.1053 | 400 | 0.6225 | +| 0.4435 | 2.6316 | 500 | 0.5924 | +| 0.2743 | 3.1579 | 600 | 0.6151 | +| 0.2846 | 3.6842 | 700 | 0.6084 | +| 0.2069 | 4.2105 | 800 | 0.6427 | +| 0.172 | 4.7368 | 900 | 0.6495 | + + +### Framework versions + +- PEFT 0.10.0 +- Transformers 4.40.0 +- Pytorch 2.2.1 +- Datasets 2.18.0 +- Tokenizers 0.19.1 \ No newline at end of file diff --git a/llama2_13b_peft/linguistics_puzzles/adapter_config.json b/llama2_13b_peft/linguistics_puzzles/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..801d71a4f86adde83f5360df8b9fd6798a9bbe77 --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "q_proj", + "up_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama2_13b_peft/linguistics_puzzles/adapter_model.safetensors b/llama2_13b_peft/linguistics_puzzles/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0805ca0f3c698ab99568958c6666457c03173fe1 --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6dff2a000b0e4ef6a4db9d169cbaa257c1423e45fae8ef4f428ba9852e00f5 +size 125248064 diff --git a/llama2_13b_peft/linguistics_puzzles/all_results.json b/llama2_13b_peft/linguistics_puzzles/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3622b5658b3efa01bbac08c815371d32e7529ee3 --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/all_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 5.0, + "eval_loss": 0.5924356579780579, + "eval_runtime": 1.9025, + "eval_samples_per_second": 42.05, + "eval_steps_per_second": 5.256, + "total_flos": 2.0275085174217114e+17, + "train_loss": 0.5822552880487945, + "train_runtime": 660.0352, + "train_samples_per_second": 11.515, + "train_steps_per_second": 1.439 +} \ No newline at end of file diff --git a/llama2_13b_peft/linguistics_puzzles/eval_results.json b/llama2_13b_peft/linguistics_puzzles/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0b1113b01be64175547033f42dea6bd88c528961 --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/eval_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 5.0, + "eval_loss": 0.5924356579780579, + "eval_runtime": 1.9025, + "eval_samples_per_second": 42.05, + "eval_steps_per_second": 5.256 +} \ No newline at end of file diff --git a/llama2_13b_peft/linguistics_puzzles/special_tokens_map.json b/llama2_13b_peft/linguistics_puzzles/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama2_13b_peft/linguistics_puzzles/tokenizer.model b/llama2_13b_peft/linguistics_puzzles/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/llama2_13b_peft/linguistics_puzzles/tokenizer_config.json b/llama2_13b_peft/linguistics_puzzles/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/llama2_13b_peft/linguistics_puzzles/train_results.json b/llama2_13b_peft/linguistics_puzzles/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1a7f209917b8fb4dcd40550c1d8743c75464bafb --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 5.0, + "total_flos": 2.0275085174217114e+17, + "train_loss": 0.5822552880487945, + "train_runtime": 660.0352, + "train_samples_per_second": 11.515, + "train_steps_per_second": 1.439 +} \ No newline at end of file diff --git a/llama2_13b_peft/linguistics_puzzles/trainer_log.jsonl b/llama2_13b_peft/linguistics_puzzles/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..acf3bc53f4746cba805944b915da98baa17f8f2a --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/trainer_log.jsonl @@ -0,0 +1,106 @@ +{"current_steps": 10, "total_steps": 950, "loss": 2.5922, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5e-05, "epoch": 0.05263157894736842, "percentage": 1.05, "elapsed_time": "0:00:07", "remaining_time": "0:12:31"} +{"current_steps": 20, "total_steps": 950, "loss": 2.3206, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-05, "epoch": 0.10526315789473684, "percentage": 2.11, "elapsed_time": "0:00:14", "remaining_time": "0:11:00"} +{"current_steps": 30, "total_steps": 950, "loss": 1.7229, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998573727324295e-05, "epoch": 0.15789473684210525, "percentage": 3.16, "elapsed_time": "0:00:20", "remaining_time": "0:10:32"} +{"current_steps": 40, "total_steps": 950, "loss": 1.3729, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994296536700177e-05, "epoch": 0.21052631578947367, "percentage": 4.21, "elapsed_time": "0:00:26", "remaining_time": "0:10:14"} +{"current_steps": 50, "total_steps": 950, "loss": 1.3635, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987173308479738e-05, "epoch": 0.2631578947368421, "percentage": 5.26, "elapsed_time": "0:00:33", "remaining_time": "0:10:10"} +{"current_steps": 60, "total_steps": 950, "loss": 1.3315, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.977212170395598e-05, "epoch": 0.3157894736842105, "percentage": 6.32, "elapsed_time": "0:00:40", "remaining_time": "0:10:00"} +{"current_steps": 70, "total_steps": 950, "loss": 1.2515, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.964424488287009e-05, "epoch": 0.3684210526315789, "percentage": 7.37, "elapsed_time": "0:00:46", "remaining_time": "0:09:49"} +{"current_steps": 80, "total_steps": 950, "loss": 1.1872, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.948824853131236e-05, "epoch": 0.42105263157894735, "percentage": 8.42, "elapsed_time": "0:00:53", "remaining_time": "0:09:43"} +{"current_steps": 90, "total_steps": 950, "loss": 1.1552, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.930431064394977e-05, "epoch": 0.47368421052631576, "percentage": 9.47, "elapsed_time": "0:01:00", "remaining_time": "0:09:34"} +{"current_steps": 100, "total_steps": 950, "loss": 1.1276, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.909264109724853e-05, "epoch": 0.5263157894736842, "percentage": 10.53, "elapsed_time": "0:01:06", "remaining_time": "0:09:27"} +{"current_steps": 100, "total_steps": 950, "loss": null, "eval_loss": 1.0876480340957642, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.5263157894736842, "percentage": 10.53, "elapsed_time": "0:01:06", "remaining_time": "0:09:27"} +{"current_steps": 110, "total_steps": 950, "loss": 1.1756, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.885348141000122e-05, "epoch": 0.5789473684210527, "percentage": 11.58, "elapsed_time": "0:01:15", "remaining_time": "0:09:37"} +{"current_steps": 120, "total_steps": 950, "loss": 1.1106, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.858710446774951e-05, "epoch": 0.631578947368421, "percentage": 12.63, "elapsed_time": "0:01:22", "remaining_time": "0:09:29"} +{"current_steps": 130, "total_steps": 950, "loss": 1.0175, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.829381421141671e-05, "epoch": 0.6842105263157895, "percentage": 13.68, "elapsed_time": "0:01:28", "remaining_time": "0:09:19"} +{"current_steps": 140, "total_steps": 950, "loss": 0.9733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7973945290505766e-05, "epoch": 0.7368421052631579, "percentage": 14.74, "elapsed_time": "0:01:35", "remaining_time": "0:09:10"} +{"current_steps": 150, "total_steps": 950, "loss": 0.9907, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7627862681258037e-05, "epoch": 0.7894736842105263, "percentage": 15.79, "elapsed_time": "0:01:41", "remaining_time": "0:09:03"} +{"current_steps": 160, "total_steps": 950, "loss": 0.9312, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.725596127020879e-05, "epoch": 0.8421052631578947, "percentage": 16.84, "elapsed_time": "0:01:48", "remaining_time": "0:08:54"} +{"current_steps": 170, "total_steps": 950, "loss": 0.9586, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.685866540361456e-05, "epoch": 0.8947368421052632, "percentage": 17.89, "elapsed_time": "0:01:55", "remaining_time": "0:08:47"} +{"current_steps": 180, "total_steps": 950, "loss": 0.9595, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.643642840326627e-05, "epoch": 0.9473684210526315, "percentage": 18.95, "elapsed_time": "0:02:01", "remaining_time": "0:08:39"} +{"current_steps": 190, "total_steps": 950, "loss": 0.8331, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.598973204924097e-05, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:02:08", "remaining_time": "0:08:32"} +{"current_steps": 200, "total_steps": 950, "loss": 0.8128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.551908603018191e-05, "epoch": 1.0526315789473684, "percentage": 21.05, "elapsed_time": "0:02:14", "remaining_time": "0:08:25"} +{"current_steps": 200, "total_steps": 950, "loss": null, "eval_loss": 0.8153461217880249, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.0526315789473684, "percentage": 21.05, "elapsed_time": "0:02:14", "remaining_time": "0:08:25"} +{"current_steps": 210, "total_steps": 950, "loss": 0.8186, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.502502736173462e-05, "epoch": 1.1052631578947367, "percentage": 22.11, "elapsed_time": "0:02:23", "remaining_time": "0:08:26"} +{"current_steps": 220, "total_steps": 950, "loss": 0.6895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.45081197738023e-05, "epoch": 1.1578947368421053, "percentage": 23.16, "elapsed_time": "0:02:30", "remaining_time": "0:08:18"} +{"current_steps": 230, "total_steps": 950, "loss": 0.7901, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3968953067319777e-05, "epoch": 1.2105263157894737, "percentage": 24.21, "elapsed_time": "0:02:36", "remaining_time": "0:08:10"} +{"current_steps": 240, "total_steps": 950, "loss": 0.704, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.340814244127993e-05, "epoch": 1.263157894736842, "percentage": 25.26, "elapsed_time": "0:02:43", "remaining_time": "0:08:03"} +{"current_steps": 250, "total_steps": 950, "loss": 0.6879, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.282632779078051e-05, "epoch": 1.3157894736842106, "percentage": 26.32, "elapsed_time": "0:02:49", "remaining_time": "0:07:55"} +{"current_steps": 260, "total_steps": 950, "loss": 0.7563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.222417297689217e-05, "epoch": 1.368421052631579, "percentage": 27.37, "elapsed_time": "0:02:56", "remaining_time": "0:07:48"} +{"current_steps": 270, "total_steps": 950, "loss": 0.6846, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.160236506918098e-05, "epoch": 1.4210526315789473, "percentage": 28.42, "elapsed_time": "0:03:03", "remaining_time": "0:07:41"} +{"current_steps": 280, "total_steps": 950, "loss": 0.7155, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.096161356174959e-05, "epoch": 1.4736842105263157, "percentage": 29.47, "elapsed_time": "0:03:09", "remaining_time": "0:07:34"} +{"current_steps": 290, "total_steps": 950, "loss": 0.8037, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.030264956369157e-05, "epoch": 1.526315789473684, "percentage": 30.53, "elapsed_time": "0:03:16", "remaining_time": "0:07:26"} +{"current_steps": 300, "total_steps": 950, "loss": 0.6705, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.962622496488269e-05, "epoch": 1.5789473684210527, "percentage": 31.58, "elapsed_time": "0:03:22", "remaining_time": "0:07:19"} +{"current_steps": 300, "total_steps": 950, "loss": null, "eval_loss": 0.6891714930534363, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.5789473684210527, "percentage": 31.58, "elapsed_time": "0:03:22", "remaining_time": "0:07:19"} +{"current_steps": 310, "total_steps": 950, "loss": 0.6389, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.893311157806091e-05, "epoch": 1.631578947368421, "percentage": 32.63, "elapsed_time": "0:03:31", "remaining_time": "0:07:17"} +{"current_steps": 320, "total_steps": 950, "loss": 0.7223, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.822410025817406e-05, "epoch": 1.6842105263157894, "percentage": 33.68, "elapsed_time": "0:03:38", "remaining_time": "0:07:10"} +{"current_steps": 330, "total_steps": 950, "loss": 0.6948, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7500000000000003e-05, "epoch": 1.736842105263158, "percentage": 34.74, "elapsed_time": "0:03:45", "remaining_time": "0:07:03"} +{"current_steps": 340, "total_steps": 950, "loss": 0.6658, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.67616370150689e-05, "epoch": 1.7894736842105263, "percentage": 35.79, "elapsed_time": "0:03:51", "remaining_time": "0:06:55"} +{"current_steps": 350, "total_steps": 950, "loss": 0.643, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.600985378894086e-05, "epoch": 1.8421052631578947, "percentage": 36.84, "elapsed_time": "0:03:58", "remaining_time": "0:06:48"} +{"current_steps": 360, "total_steps": 950, "loss": 0.6537, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5245508119914687e-05, "epoch": 1.8947368421052633, "percentage": 37.89, "elapsed_time": "0:04:05", "remaining_time": "0:06:41"} +{"current_steps": 370, "total_steps": 950, "loss": 0.641, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.44694721402644e-05, "epoch": 1.9473684210526314, "percentage": 38.95, "elapsed_time": "0:04:12", "remaining_time": "0:06:35"} +{"current_steps": 380, "total_steps": 950, "loss": 0.6708, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.3682631321120504e-05, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:04:18", "remaining_time": "0:06:27"} +{"current_steps": 390, "total_steps": 950, "loss": 0.5061, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.2885883462131394e-05, "epoch": 2.0526315789473686, "percentage": 41.05, "elapsed_time": "0:04:24", "remaining_time": "0:06:20"} +{"current_steps": 400, "total_steps": 950, "loss": 0.4876, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.2080137667057595e-05, "epoch": 2.1052631578947367, "percentage": 42.11, "elapsed_time": "0:04:31", "remaining_time": "0:06:13"} +{"current_steps": 400, "total_steps": 950, "loss": null, "eval_loss": 0.6224929690361023, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.1052631578947367, "percentage": 42.11, "elapsed_time": "0:04:31", "remaining_time": "0:06:13"} +{"current_steps": 410, "total_steps": 950, "loss": 0.485, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.126631330646802e-05, "epoch": 2.1578947368421053, "percentage": 43.16, "elapsed_time": "0:04:40", "remaining_time": "0:06:09"} +{"current_steps": 420, "total_steps": 950, "loss": 0.536, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.0445338968721287e-05, "epoch": 2.2105263157894735, "percentage": 44.21, "elapsed_time": "0:04:47", "remaining_time": "0:06:02"} +{"current_steps": 430, "total_steps": 950, "loss": 0.4493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.961815140042974e-05, "epoch": 2.263157894736842, "percentage": 45.26, "elapsed_time": "0:04:53", "remaining_time": "0:05:55"} +{"current_steps": 440, "total_steps": 950, "loss": 0.4806, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.878569443761442e-05, "epoch": 2.3157894736842106, "percentage": 46.32, "elapsed_time": "0:05:00", "remaining_time": "0:05:48"} +{"current_steps": 450, "total_steps": 950, "loss": 0.4642, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.7948917928771158e-05, "epoch": 2.3684210526315788, "percentage": 47.37, "elapsed_time": "0:05:07", "remaining_time": "0:05:41"} +{"current_steps": 460, "total_steps": 950, "loss": 0.4857, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.7108776651076118e-05, "epoch": 2.4210526315789473, "percentage": 48.42, "elapsed_time": "0:05:13", "remaining_time": "0:05:34"} +{"current_steps": 470, "total_steps": 950, "loss": 0.4604, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.6266229220967818e-05, "epoch": 2.473684210526316, "percentage": 49.47, "elapsed_time": "0:05:20", "remaining_time": "0:05:27"} +{"current_steps": 480, "total_steps": 950, "loss": 0.4294, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5422237000348276e-05, "epoch": 2.526315789473684, "percentage": 50.53, "elapsed_time": "0:05:27", "remaining_time": "0:05:20"} +{"current_steps": 490, "total_steps": 950, "loss": 0.436, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.4577762999651726e-05, "epoch": 2.5789473684210527, "percentage": 51.58, "elapsed_time": "0:05:34", "remaining_time": "0:05:13"} +{"current_steps": 500, "total_steps": 950, "loss": 0.4435, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.3733770779032184e-05, "epoch": 2.6315789473684212, "percentage": 52.63, "elapsed_time": "0:05:40", "remaining_time": "0:05:06"} +{"current_steps": 500, "total_steps": 950, "loss": null, "eval_loss": 0.5924356579780579, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.6315789473684212, "percentage": 52.63, "elapsed_time": "0:05:40", "remaining_time": "0:05:06"} +{"current_steps": 510, "total_steps": 950, "loss": 0.4128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.2891223348923884e-05, "epoch": 2.6842105263157894, "percentage": 53.68, "elapsed_time": "0:05:49", "remaining_time": "0:05:01"} +{"current_steps": 520, "total_steps": 950, "loss": 0.4201, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.2051082071228854e-05, "epoch": 2.736842105263158, "percentage": 54.74, "elapsed_time": "0:05:55", "remaining_time": "0:04:54"} +{"current_steps": 530, "total_steps": 950, "loss": 0.4144, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.1214305562385592e-05, "epoch": 2.7894736842105265, "percentage": 55.79, "elapsed_time": "0:06:02", "remaining_time": "0:04:47"} +{"current_steps": 540, "total_steps": 950, "loss": 0.4325, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.0381848599570276e-05, "epoch": 2.8421052631578947, "percentage": 56.84, "elapsed_time": "0:06:09", "remaining_time": "0:04:40"} +{"current_steps": 550, "total_steps": 950, "loss": 0.4539, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.9554661031278712e-05, "epoch": 2.8947368421052633, "percentage": 57.89, "elapsed_time": "0:06:15", "remaining_time": "0:04:33"} +{"current_steps": 560, "total_steps": 950, "loss": 0.3898, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.8733686693531985e-05, "epoch": 2.9473684210526314, "percentage": 58.95, "elapsed_time": "0:06:22", "remaining_time": "0:04:26"} +{"current_steps": 570, "total_steps": 950, "loss": 0.4347, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.79198623329424e-05, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "0:06:28", "remaining_time": "0:04:19"} +{"current_steps": 580, "total_steps": 950, "loss": 0.2771, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.711411653786861e-05, "epoch": 3.0526315789473686, "percentage": 61.05, "elapsed_time": "0:06:35", "remaining_time": "0:04:12"} +{"current_steps": 590, "total_steps": 950, "loss": 0.2786, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.6317368678879495e-05, "epoch": 3.1052631578947367, "percentage": 62.11, "elapsed_time": "0:06:41", "remaining_time": "0:04:05"} +{"current_steps": 600, "total_steps": 950, "loss": 0.2743, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.55305278597356e-05, "epoch": 3.1578947368421053, "percentage": 63.16, "elapsed_time": "0:06:48", "remaining_time": "0:03:58"} +{"current_steps": 600, "total_steps": 950, "loss": null, "eval_loss": 0.6151354908943176, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 3.1578947368421053, "percentage": 63.16, "elapsed_time": "0:06:48", "remaining_time": "0:03:58"} +{"current_steps": 610, "total_steps": 950, "loss": 0.2611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.475449188008532e-05, "epoch": 3.2105263157894735, "percentage": 64.21, "elapsed_time": "0:06:57", "remaining_time": "0:03:52"} +{"current_steps": 620, "total_steps": 950, "loss": 0.237, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.399014621105914e-05, "epoch": 3.263157894736842, "percentage": 65.26, "elapsed_time": "0:07:03", "remaining_time": "0:03:45"} +{"current_steps": 630, "total_steps": 950, "loss": 0.2319, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.3238362984931113e-05, "epoch": 3.3157894736842106, "percentage": 66.32, "elapsed_time": "0:07:10", "remaining_time": "0:03:38"} +{"current_steps": 640, "total_steps": 950, "loss": 0.2785, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.2500000000000006e-05, "epoch": 3.3684210526315788, "percentage": 67.37, "elapsed_time": "0:07:17", "remaining_time": "0:03:31"} +{"current_steps": 650, "total_steps": 950, "loss": 0.3323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.1775899741825947e-05, "epoch": 3.4210526315789473, "percentage": 68.42, "elapsed_time": "0:07:23", "remaining_time": "0:03:24"} +{"current_steps": 660, "total_steps": 950, "loss": 0.2762, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.1066888421939093e-05, "epoch": 3.473684210526316, "percentage": 69.47, "elapsed_time": "0:07:30", "remaining_time": "0:03:17"} +{"current_steps": 670, "total_steps": 950, "loss": 0.2982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.0373775035117305e-05, "epoch": 3.526315789473684, "percentage": 70.53, "elapsed_time": "0:07:37", "remaining_time": "0:03:11"} +{"current_steps": 680, "total_steps": 950, "loss": 0.2338, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.697350436308427e-06, "epoch": 3.5789473684210527, "percentage": 71.58, "elapsed_time": "0:07:43", "remaining_time": "0:03:04"} +{"current_steps": 690, "total_steps": 950, "loss": 0.2962, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.038386438250415e-06, "epoch": 3.6315789473684212, "percentage": 72.63, "elapsed_time": "0:07:50", "remaining_time": "0:02:57"} +{"current_steps": 700, "total_steps": 950, "loss": 0.2846, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.397634930819021e-06, "epoch": 3.6842105263157894, "percentage": 73.68, "elapsed_time": "0:07:57", "remaining_time": "0:02:50"} +{"current_steps": 700, "total_steps": 950, "loss": null, "eval_loss": 0.6083844900131226, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 3.6842105263157894, "percentage": 73.68, "elapsed_time": "0:07:57", "remaining_time": "0:02:50"} +{"current_steps": 710, "total_steps": 950, "loss": 0.2895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.775827023107835e-06, "epoch": 3.736842105263158, "percentage": 74.74, "elapsed_time": "0:08:06", "remaining_time": "0:02:44"} +{"current_steps": 720, "total_steps": 950, "loss": 0.3261, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.173672209219495e-06, "epoch": 3.7894736842105265, "percentage": 75.79, "elapsed_time": "0:08:13", "remaining_time": "0:02:37"} +{"current_steps": 730, "total_steps": 950, "loss": 0.2358, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.591857558720071e-06, "epoch": 3.8421052631578947, "percentage": 76.84, "elapsed_time": "0:08:20", "remaining_time": "0:02:30"} +{"current_steps": 740, "total_steps": 950, "loss": 0.2723, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.031046932680229e-06, "epoch": 3.8947368421052633, "percentage": 77.89, "elapsed_time": "0:08:26", "remaining_time": "0:02:23"} +{"current_steps": 750, "total_steps": 950, "loss": 0.2941, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.491880226197707e-06, "epoch": 3.9473684210526314, "percentage": 78.95, "elapsed_time": "0:08:33", "remaining_time": "0:02:16"} +{"current_steps": 760, "total_steps": 950, "loss": 0.2721, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9749726382653905e-06, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "0:08:39", "remaining_time": "0:02:09"} +{"current_steps": 770, "total_steps": 950, "loss": 0.1677, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.480913969818098e-06, "epoch": 4.052631578947368, "percentage": 81.05, "elapsed_time": "0:08:46", "remaining_time": "0:02:03"} +{"current_steps": 780, "total_steps": 950, "loss": 0.2291, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.010267950759025e-06, "epoch": 4.105263157894737, "percentage": 82.11, "elapsed_time": "0:08:53", "remaining_time": "0:01:56"} +{"current_steps": 790, "total_steps": 950, "loss": 0.1991, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5635715967337223e-06, "epoch": 4.157894736842105, "percentage": 83.16, "elapsed_time": "0:09:00", "remaining_time": "0:01:49"} +{"current_steps": 800, "total_steps": 950, "loss": 0.2069, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.141334596385448e-06, "epoch": 4.2105263157894735, "percentage": 84.21, "elapsed_time": "0:09:06", "remaining_time": "0:01:42"} +{"current_steps": 800, "total_steps": 950, "loss": null, "eval_loss": 0.6427180767059326, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.2105263157894735, "percentage": 84.21, "elapsed_time": "0:09:06", "remaining_time": "0:01:42"} +{"current_steps": 810, "total_steps": 950, "loss": 0.2213, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.7440387297912123e-06, "epoch": 4.2631578947368425, "percentage": 85.26, "elapsed_time": "0:09:15", "remaining_time": "0:01:36"} +{"current_steps": 820, "total_steps": 950, "loss": 0.2008, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.372137318741968e-06, "epoch": 4.315789473684211, "percentage": 86.32, "elapsed_time": "0:09:22", "remaining_time": "0:01:29"} +{"current_steps": 830, "total_steps": 950, "loss": 0.2178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.026054709494235e-06, "epoch": 4.368421052631579, "percentage": 87.37, "elapsed_time": "0:09:28", "remaining_time": "0:01:22"} +{"current_steps": 840, "total_steps": 950, "loss": 0.1878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.7061857885832893e-06, "epoch": 4.421052631578947, "percentage": 88.42, "elapsed_time": "0:09:35", "remaining_time": "0:01:15"} +{"current_steps": 850, "total_steps": 950, "loss": 0.1733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.4128955322504966e-06, "epoch": 4.473684210526316, "percentage": 89.47, "elapsed_time": "0:09:41", "remaining_time": "0:01:08"} +{"current_steps": 860, "total_steps": 950, "loss": 0.193, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.1465185899987797e-06, "epoch": 4.526315789473684, "percentage": 90.53, "elapsed_time": "0:09:48", "remaining_time": "0:01:01"} +{"current_steps": 870, "total_steps": 950, "loss": 0.1802, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.073589027514789e-07, "epoch": 4.578947368421053, "percentage": 91.58, "elapsed_time": "0:09:55", "remaining_time": "0:00:54"} +{"current_steps": 880, "total_steps": 950, "loss": 0.1736, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.956893560502359e-07, "epoch": 4.631578947368421, "percentage": 92.63, "elapsed_time": "0:10:01", "remaining_time": "0:00:47"} +{"current_steps": 890, "total_steps": 950, "loss": 0.1761, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.117514686876379e-07, "epoch": 4.684210526315789, "percentage": 93.68, "elapsed_time": "0:10:08", "remaining_time": "0:00:41"} +{"current_steps": 900, "total_steps": 950, "loss": 0.172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.557551171299051e-07, "epoch": 4.7368421052631575, "percentage": 94.74, "elapsed_time": "0:10:14", "remaining_time": "0:00:34"} +{"current_steps": 900, "total_steps": 950, "loss": null, "eval_loss": 0.6494551301002502, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.7368421052631575, "percentage": 94.74, "elapsed_time": "0:10:14", "remaining_time": "0:00:34"} +{"current_steps": 910, "total_steps": 950, "loss": 0.1734, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.27878296044029e-07, "epoch": 4.7894736842105265, "percentage": 95.79, "elapsed_time": "0:10:23", "remaining_time": "0:00:27"} +{"current_steps": 920, "total_steps": 950, "loss": 0.1954, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.2826691520262114e-07, "epoch": 4.842105263157895, "percentage": 96.84, "elapsed_time": "0:10:30", "remaining_time": "0:00:20"} +{"current_steps": 930, "total_steps": 950, "loss": 0.1744, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.7034632998231865e-08, "epoch": 4.894736842105263, "percentage": 97.89, "elapsed_time": "0:10:37", "remaining_time": "0:00:13"} +{"current_steps": 940, "total_steps": 950, "loss": 0.1778, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.4262726757049982e-08, "epoch": 4.947368421052632, "percentage": 98.95, "elapsed_time": "0:10:43", "remaining_time": "0:00:06"} +{"current_steps": 950, "total_steps": 950, "loss": 0.1836, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 0.0, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:10:50", "remaining_time": "0:00:00"} +{"current_steps": 950, "total_steps": 950, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:10:50", "remaining_time": "0:00:00"} +{"current_steps": 10, "total_steps": 10, "loss": null, "eval_loss": 0.5924356579780579, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:11:00", "remaining_time": "0:00:00"} diff --git a/llama2_13b_peft/linguistics_puzzles/trainer_state.json b/llama2_13b_peft/linguistics_puzzles/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8d2aba5a478caae31e9b9f8f309e413aebb9459f --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/trainer_state.json @@ -0,0 +1,767 @@ +{ + "best_metric": 0.5924356579780579, + "best_model_checkpoint": "ckpt/llama2_13b_other/linguistics_puzzles_no_sys/checkpoint-500", + "epoch": 5.0, + "eval_steps": 100, + "global_step": 950, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05263157894736842, + "grad_norm": 1.5050264596939087, + "learning_rate": 2.5e-05, + "loss": 2.5922, + "step": 10 + }, + { + "epoch": 0.10526315789473684, + "grad_norm": 1.5525988340377808, + "learning_rate": 5e-05, + "loss": 2.3206, + "step": 20 + }, + { + "epoch": 0.15789473684210525, + "grad_norm": 1.7404705286026, + "learning_rate": 4.998573727324295e-05, + "loss": 1.7229, + "step": 30 + }, + { + "epoch": 0.21052631578947367, + "grad_norm": 1.8962088823318481, + "learning_rate": 4.994296536700177e-05, + "loss": 1.3729, + "step": 40 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 1.776729941368103, + "learning_rate": 4.987173308479738e-05, + "loss": 1.3635, + "step": 50 + }, + { + "epoch": 0.3157894736842105, + "grad_norm": 11.020795822143555, + "learning_rate": 4.977212170395598e-05, + "loss": 1.3315, + "step": 60 + }, + { + "epoch": 0.3684210526315789, + "grad_norm": 2.192176580429077, + "learning_rate": 4.964424488287009e-05, + "loss": 1.2515, + "step": 70 + }, + { + "epoch": 0.42105263157894735, + "grad_norm": 2.4063496589660645, + "learning_rate": 4.948824853131236e-05, + "loss": 1.1872, + "step": 80 + }, + { + "epoch": 0.47368421052631576, + "grad_norm": 2.7862613201141357, + "learning_rate": 4.930431064394977e-05, + "loss": 1.1552, + "step": 90 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.5330026149749756, + "learning_rate": 4.909264109724853e-05, + "loss": 1.1276, + "step": 100 + }, + { + "epoch": 0.5263157894736842, + "eval_loss": 1.0876480340957642, + "eval_runtime": 1.9022, + "eval_samples_per_second": 42.057, + "eval_steps_per_second": 5.257, + "step": 100 + }, + { + "epoch": 0.5789473684210527, + "grad_norm": 2.4774415493011475, + "learning_rate": 4.885348141000122e-05, + "loss": 1.1756, + "step": 110 + }, + { + "epoch": 0.631578947368421, + "grad_norm": 2.380500555038452, + "learning_rate": 4.858710446774951e-05, + "loss": 1.1106, + "step": 120 + }, + { + "epoch": 0.6842105263157895, + "grad_norm": 3.0656540393829346, + "learning_rate": 4.829381421141671e-05, + "loss": 1.0175, + "step": 130 + }, + { + "epoch": 0.7368421052631579, + "grad_norm": 5.256251811981201, + "learning_rate": 4.7973945290505766e-05, + "loss": 0.9733, + "step": 140 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 2.674135446548462, + "learning_rate": 4.7627862681258037e-05, + "loss": 0.9907, + "step": 150 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 3.5206069946289062, + "learning_rate": 4.725596127020879e-05, + "loss": 0.9312, + "step": 160 + }, + { + "epoch": 0.8947368421052632, + "grad_norm": 3.4086978435516357, + "learning_rate": 4.685866540361456e-05, + "loss": 0.9586, + "step": 170 + }, + { + "epoch": 0.9473684210526315, + "grad_norm": 4.591642379760742, + "learning_rate": 4.643642840326627e-05, + "loss": 0.9595, + "step": 180 + }, + { + "epoch": 1.0, + "grad_norm": 2.8823249340057373, + "learning_rate": 4.598973204924097e-05, + "loss": 0.8331, + "step": 190 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 3.7064428329467773, + "learning_rate": 4.551908603018191e-05, + "loss": 0.8128, + "step": 200 + }, + { + "epoch": 1.0526315789473684, + "eval_loss": 0.8153461217880249, + "eval_runtime": 1.9192, + "eval_samples_per_second": 41.684, + "eval_steps_per_second": 5.21, + "step": 200 + }, + { + "epoch": 1.1052631578947367, + "grad_norm": 4.2386274337768555, + "learning_rate": 4.502502736173462e-05, + "loss": 0.8186, + "step": 210 + }, + { + "epoch": 1.1578947368421053, + "grad_norm": 3.1767256259918213, + "learning_rate": 4.45081197738023e-05, + "loss": 0.6895, + "step": 220 + }, + { + "epoch": 1.2105263157894737, + "grad_norm": 3.748518466949463, + "learning_rate": 4.3968953067319777e-05, + "loss": 0.7901, + "step": 230 + }, + { + "epoch": 1.263157894736842, + "grad_norm": 3.807053565979004, + "learning_rate": 4.340814244127993e-05, + "loss": 0.704, + "step": 240 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 5.013542175292969, + "learning_rate": 4.282632779078051e-05, + "loss": 0.6879, + "step": 250 + }, + { + "epoch": 1.368421052631579, + "grad_norm": 4.752715110778809, + "learning_rate": 4.222417297689217e-05, + "loss": 0.7563, + "step": 260 + }, + { + "epoch": 1.4210526315789473, + "grad_norm": 3.6476950645446777, + "learning_rate": 4.160236506918098e-05, + "loss": 0.6846, + "step": 270 + }, + { + "epoch": 1.4736842105263157, + "grad_norm": 3.8758108615875244, + "learning_rate": 4.096161356174959e-05, + "loss": 0.7155, + "step": 280 + }, + { + "epoch": 1.526315789473684, + "grad_norm": 4.166601657867432, + "learning_rate": 4.030264956369157e-05, + "loss": 0.8037, + "step": 290 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 4.603171348571777, + "learning_rate": 3.962622496488269e-05, + "loss": 0.6705, + "step": 300 + }, + { + "epoch": 1.5789473684210527, + "eval_loss": 0.6891714930534363, + "eval_runtime": 1.9174, + "eval_samples_per_second": 41.724, + "eval_steps_per_second": 5.216, + "step": 300 + }, + { + "epoch": 1.631578947368421, + "grad_norm": 3.820142984390259, + "learning_rate": 3.893311157806091e-05, + "loss": 0.6389, + "step": 310 + }, + { + "epoch": 1.6842105263157894, + "grad_norm": 5.900814533233643, + "learning_rate": 3.822410025817406e-05, + "loss": 0.7223, + "step": 320 + }, + { + "epoch": 1.736842105263158, + "grad_norm": 4.315140724182129, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.6948, + "step": 330 + }, + { + "epoch": 1.7894736842105263, + "grad_norm": 4.747324466705322, + "learning_rate": 3.67616370150689e-05, + "loss": 0.6658, + "step": 340 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 3.504014492034912, + "learning_rate": 3.600985378894086e-05, + "loss": 0.643, + "step": 350 + }, + { + "epoch": 1.8947368421052633, + "grad_norm": 5.181077480316162, + "learning_rate": 3.5245508119914687e-05, + "loss": 0.6537, + "step": 360 + }, + { + "epoch": 1.9473684210526314, + "grad_norm": 5.073149681091309, + "learning_rate": 3.44694721402644e-05, + "loss": 0.641, + "step": 370 + }, + { + "epoch": 2.0, + "grad_norm": 5.070895671844482, + "learning_rate": 3.3682631321120504e-05, + "loss": 0.6708, + "step": 380 + }, + { + "epoch": 2.0526315789473686, + "grad_norm": 5.305852890014648, + "learning_rate": 3.2885883462131394e-05, + "loss": 0.5061, + "step": 390 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 6.452213287353516, + "learning_rate": 3.2080137667057595e-05, + "loss": 0.4876, + "step": 400 + }, + { + "epoch": 2.1052631578947367, + "eval_loss": 0.6224929690361023, + "eval_runtime": 1.9167, + "eval_samples_per_second": 41.739, + "eval_steps_per_second": 5.217, + "step": 400 + }, + { + "epoch": 2.1578947368421053, + "grad_norm": 3.6080775260925293, + "learning_rate": 3.126631330646802e-05, + "loss": 0.485, + "step": 410 + }, + { + "epoch": 2.2105263157894735, + "grad_norm": 2.2630574703216553, + "learning_rate": 3.0445338968721287e-05, + "loss": 0.536, + "step": 420 + }, + { + "epoch": 2.263157894736842, + "grad_norm": 4.616273880004883, + "learning_rate": 2.961815140042974e-05, + "loss": 0.4493, + "step": 430 + }, + { + "epoch": 2.3157894736842106, + "grad_norm": 4.5297956466674805, + "learning_rate": 2.878569443761442e-05, + "loss": 0.4806, + "step": 440 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 4.910376071929932, + "learning_rate": 2.7948917928771158e-05, + "loss": 0.4642, + "step": 450 + }, + { + "epoch": 2.4210526315789473, + "grad_norm": 4.3276801109313965, + "learning_rate": 2.7108776651076118e-05, + "loss": 0.4857, + "step": 460 + }, + { + "epoch": 2.473684210526316, + "grad_norm": 3.657116413116455, + "learning_rate": 2.6266229220967818e-05, + "loss": 0.4604, + "step": 470 + }, + { + "epoch": 2.526315789473684, + "grad_norm": 4.7539896965026855, + "learning_rate": 2.5422237000348276e-05, + "loss": 0.4294, + "step": 480 + }, + { + "epoch": 2.5789473684210527, + "grad_norm": 4.227921962738037, + "learning_rate": 2.4577762999651726e-05, + "loss": 0.436, + "step": 490 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 6.821872234344482, + "learning_rate": 2.3733770779032184e-05, + "loss": 0.4435, + "step": 500 + }, + { + "epoch": 2.6315789473684212, + "eval_loss": 0.5924356579780579, + "eval_runtime": 1.9193, + "eval_samples_per_second": 41.683, + "eval_steps_per_second": 5.21, + "step": 500 + }, + { + "epoch": 2.6842105263157894, + "grad_norm": 4.023755073547363, + "learning_rate": 2.2891223348923884e-05, + "loss": 0.4128, + "step": 510 + }, + { + "epoch": 2.736842105263158, + "grad_norm": 4.245009899139404, + "learning_rate": 2.2051082071228854e-05, + "loss": 0.4201, + "step": 520 + }, + { + "epoch": 2.7894736842105265, + "grad_norm": 7.485212326049805, + "learning_rate": 2.1214305562385592e-05, + "loss": 0.4144, + "step": 530 + }, + { + "epoch": 2.8421052631578947, + "grad_norm": 3.890044689178467, + "learning_rate": 2.0381848599570276e-05, + "loss": 0.4325, + "step": 540 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 5.785126686096191, + "learning_rate": 1.9554661031278712e-05, + "loss": 0.4539, + "step": 550 + }, + { + "epoch": 2.9473684210526314, + "grad_norm": 3.959681272506714, + "learning_rate": 1.8733686693531985e-05, + "loss": 0.3898, + "step": 560 + }, + { + "epoch": 3.0, + "grad_norm": 6.1470160484313965, + "learning_rate": 1.79198623329424e-05, + "loss": 0.4347, + "step": 570 + }, + { + "epoch": 3.0526315789473686, + "grad_norm": 6.080893039703369, + "learning_rate": 1.711411653786861e-05, + "loss": 0.2771, + "step": 580 + }, + { + "epoch": 3.1052631578947367, + "grad_norm": 3.995936155319214, + "learning_rate": 1.6317368678879495e-05, + "loss": 0.2786, + "step": 590 + }, + { + "epoch": 3.1578947368421053, + "grad_norm": 4.9943084716796875, + "learning_rate": 1.55305278597356e-05, + "loss": 0.2743, + "step": 600 + }, + { + "epoch": 3.1578947368421053, + "eval_loss": 0.6151354908943176, + "eval_runtime": 1.9185, + "eval_samples_per_second": 41.7, + "eval_steps_per_second": 5.212, + "step": 600 + }, + { + "epoch": 3.2105263157894735, + "grad_norm": 3.650193452835083, + "learning_rate": 1.475449188008532e-05, + "loss": 0.2611, + "step": 610 + }, + { + "epoch": 3.263157894736842, + "grad_norm": 3.5425643920898438, + "learning_rate": 1.399014621105914e-05, + "loss": 0.237, + "step": 620 + }, + { + "epoch": 3.3157894736842106, + "grad_norm": 4.187167644500732, + "learning_rate": 1.3238362984931113e-05, + "loss": 0.2319, + "step": 630 + }, + { + "epoch": 3.3684210526315788, + "grad_norm": 3.7174108028411865, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.2785, + "step": 640 + }, + { + "epoch": 3.4210526315789473, + "grad_norm": 4.665218353271484, + "learning_rate": 1.1775899741825947e-05, + "loss": 0.3323, + "step": 650 + }, + { + "epoch": 3.473684210526316, + "grad_norm": 6.711315631866455, + "learning_rate": 1.1066888421939093e-05, + "loss": 0.2762, + "step": 660 + }, + { + "epoch": 3.526315789473684, + "grad_norm": 4.101406097412109, + "learning_rate": 1.0373775035117305e-05, + "loss": 0.2982, + "step": 670 + }, + { + "epoch": 3.5789473684210527, + "grad_norm": 3.3571157455444336, + "learning_rate": 9.697350436308427e-06, + "loss": 0.2338, + "step": 680 + }, + { + "epoch": 3.6315789473684212, + "grad_norm": 7.152629852294922, + "learning_rate": 9.038386438250415e-06, + "loss": 0.2962, + "step": 690 + }, + { + "epoch": 3.6842105263157894, + "grad_norm": 5.147871971130371, + "learning_rate": 8.397634930819021e-06, + "loss": 0.2846, + "step": 700 + }, + { + "epoch": 3.6842105263157894, + "eval_loss": 0.6083844900131226, + "eval_runtime": 1.9199, + "eval_samples_per_second": 41.67, + "eval_steps_per_second": 5.209, + "step": 700 + }, + { + "epoch": 3.736842105263158, + "grad_norm": 3.984264373779297, + "learning_rate": 7.775827023107835e-06, + "loss": 0.2895, + "step": 710 + }, + { + "epoch": 3.7894736842105265, + "grad_norm": 6.230710983276367, + "learning_rate": 7.173672209219495e-06, + "loss": 0.3261, + "step": 720 + }, + { + "epoch": 3.8421052631578947, + "grad_norm": 3.685063362121582, + "learning_rate": 6.591857558720071e-06, + "loss": 0.2358, + "step": 730 + }, + { + "epoch": 3.8947368421052633, + "grad_norm": 4.337435245513916, + "learning_rate": 6.031046932680229e-06, + "loss": 0.2723, + "step": 740 + }, + { + "epoch": 3.9473684210526314, + "grad_norm": 4.504445552825928, + "learning_rate": 5.491880226197707e-06, + "loss": 0.2941, + "step": 750 + }, + { + "epoch": 4.0, + "grad_norm": 4.7959442138671875, + "learning_rate": 4.9749726382653905e-06, + "loss": 0.2721, + "step": 760 + }, + { + "epoch": 4.052631578947368, + "grad_norm": 2.663322925567627, + "learning_rate": 4.480913969818098e-06, + "loss": 0.1677, + "step": 770 + }, + { + "epoch": 4.105263157894737, + "grad_norm": 5.704188346862793, + "learning_rate": 4.010267950759025e-06, + "loss": 0.2291, + "step": 780 + }, + { + "epoch": 4.157894736842105, + "grad_norm": 4.857370853424072, + "learning_rate": 3.5635715967337223e-06, + "loss": 0.1991, + "step": 790 + }, + { + "epoch": 4.2105263157894735, + "grad_norm": 2.6290528774261475, + "learning_rate": 3.141334596385448e-06, + "loss": 0.2069, + "step": 800 + }, + { + "epoch": 4.2105263157894735, + "eval_loss": 0.6427180767059326, + "eval_runtime": 1.9195, + "eval_samples_per_second": 41.677, + "eval_steps_per_second": 5.21, + "step": 800 + }, + { + "epoch": 4.2631578947368425, + "grad_norm": 6.7939558029174805, + "learning_rate": 2.7440387297912123e-06, + "loss": 0.2213, + "step": 810 + }, + { + "epoch": 4.315789473684211, + "grad_norm": 5.425328731536865, + "learning_rate": 2.372137318741968e-06, + "loss": 0.2008, + "step": 820 + }, + { + "epoch": 4.368421052631579, + "grad_norm": 3.0159809589385986, + "learning_rate": 2.026054709494235e-06, + "loss": 0.2178, + "step": 830 + }, + { + "epoch": 4.421052631578947, + "grad_norm": 4.54276704788208, + "learning_rate": 1.7061857885832893e-06, + "loss": 0.1878, + "step": 840 + }, + { + "epoch": 4.473684210526316, + "grad_norm": 4.1157755851745605, + "learning_rate": 1.4128955322504966e-06, + "loss": 0.1733, + "step": 850 + }, + { + "epoch": 4.526315789473684, + "grad_norm": 4.860106945037842, + "learning_rate": 1.1465185899987797e-06, + "loss": 0.193, + "step": 860 + }, + { + "epoch": 4.578947368421053, + "grad_norm": 4.945047378540039, + "learning_rate": 9.073589027514789e-07, + "loss": 0.1802, + "step": 870 + }, + { + "epoch": 4.631578947368421, + "grad_norm": 2.316741943359375, + "learning_rate": 6.956893560502359e-07, + "loss": 0.1736, + "step": 880 + }, + { + "epoch": 4.684210526315789, + "grad_norm": 4.012813091278076, + "learning_rate": 5.117514686876379e-07, + "loss": 0.1761, + "step": 890 + }, + { + "epoch": 4.7368421052631575, + "grad_norm": 5.301681995391846, + "learning_rate": 3.557551171299051e-07, + "loss": 0.172, + "step": 900 + }, + { + "epoch": 4.7368421052631575, + "eval_loss": 0.6494551301002502, + "eval_runtime": 1.9201, + "eval_samples_per_second": 41.665, + "eval_steps_per_second": 5.208, + "step": 900 + }, + { + "epoch": 4.7894736842105265, + "grad_norm": 3.559140205383301, + "learning_rate": 2.27878296044029e-07, + "loss": 0.1734, + "step": 910 + }, + { + "epoch": 4.842105263157895, + "grad_norm": 7.743849277496338, + "learning_rate": 1.2826691520262114e-07, + "loss": 0.1954, + "step": 920 + }, + { + "epoch": 4.894736842105263, + "grad_norm": 3.5408854484558105, + "learning_rate": 5.7034632998231865e-08, + "loss": 0.1744, + "step": 930 + }, + { + "epoch": 4.947368421052632, + "grad_norm": 2.413121461868286, + "learning_rate": 1.4262726757049982e-08, + "loss": 0.1778, + "step": 940 + }, + { + "epoch": 5.0, + "grad_norm": 2.56962513923645, + "learning_rate": 0.0, + "loss": 0.1836, + "step": 950 + }, + { + "epoch": 5.0, + "step": 950, + "total_flos": 2.0275085174217114e+17, + "train_loss": 0.5822552880487945, + "train_runtime": 660.0352, + "train_samples_per_second": 11.515, + "train_steps_per_second": 1.439 + } + ], + "logging_steps": 10, + "max_steps": 950, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 100, + "total_flos": 2.0275085174217114e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama2_13b_peft/linguistics_puzzles/training_args.bin b/llama2_13b_peft/linguistics_puzzles/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..998f813cd6613d304b43fa85e7995b297053a484 --- /dev/null +++ b/llama2_13b_peft/linguistics_puzzles/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ac58d8b967dcc701c74de72e5e18349db160299022d297808b6aa2f75860a0 +size 5176 diff --git a/llama2_13b_peft/linguistics_puzzles/training_eval_loss.png b/llama2_13b_peft/linguistics_puzzles/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f9e938a29eab3735e50f5ced837e7027bc478ee8 Binary files /dev/null and b/llama2_13b_peft/linguistics_puzzles/training_eval_loss.png differ diff --git a/llama2_13b_peft/linguistics_puzzles/training_loss.png b/llama2_13b_peft/linguistics_puzzles/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..56ee27bf2bef7198c03838f4403ee262b08e15ad Binary files /dev/null and b/llama2_13b_peft/linguistics_puzzles/training_loss.png differ diff --git a/llama2_13b_peft/news_commentary_de/README.md b/llama2_13b_peft/news_commentary_de/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fd3b86616b694273bc9e706a5e8ed3747ef141f3 --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/README.md @@ -0,0 +1,85 @@ +--- +license: other +library_name: peft +tags: +- llama-factory +- lora +- generated_from_trainer +base_model: /data1/model/llama2/meta-llama/Llama2-13b +model-index: +- name: news_commentary_de_no_sys + results: [] +--- + + + +# news_commentary_de_no_sys + +This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the news_commentary_de_no_sys dataset. +It achieves the following results on the evaluation set: +- Loss: 0.6944 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 1e-05 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 2 +- total_train_batch_size: 16 +- total_eval_batch_size: 16 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 20 +- num_epochs: 10.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:-----:|:----:|:---------------:| +| 0.7429 | 0.13 | 200 | 0.7712 | +| 0.7549 | 0.25 | 400 | 0.7434 | +| 0.7552 | 0.38 | 600 | 0.7330 | +| 0.7265 | 0.5 | 800 | 0.7256 | +| 0.7524 | 0.63 | 1000 | 0.7200 | +| 0.6976 | 0.75 | 1200 | 0.7151 | +| 0.7408 | 0.88 | 1400 | 0.7116 | +| 0.701 | 1.0 | 1600 | 0.7085 | +| 0.7084 | 1.13 | 1800 | 0.7059 | +| 0.6999 | 1.25 | 2000 | 0.7040 | +| 0.7182 | 1.38 | 2200 | 0.7022 | +| 0.7267 | 1.51 | 2400 | 0.6994 | +| 0.6912 | 1.63 | 2600 | 0.6972 | +| 0.6821 | 1.76 | 2800 | 0.6954 | +| 0.7104 | 1.88 | 3000 | 0.6944 | +| 0.6222 | 2.01 | 3200 | 0.6934 | +| 0.6383 | 2.13 | 3400 | 0.6974 | +| 0.6436 | 2.26 | 3600 | 0.6981 | +| 0.6444 | 2.38 | 3800 | 0.6968 | +| 0.6368 | 2.51 | 4000 | 0.6987 | + + +### Framework versions + +- PEFT 0.9.0 +- Transformers 4.38.2 +- Pytorch 2.2.1 +- Datasets 2.18.0 +- Tokenizers 0.15.2 \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_de/adapter_config.json b/llama2_13b_peft/news_commentary_de/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b194b947adda4e9dcd02e2d860237742eda5a32 --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "down_proj", + "v_proj", + "k_proj", + "o_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_de/adapter_model.safetensors b/llama2_13b_peft/news_commentary_de/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c87e568a8d15429a2724b153c7cfc1503989dd8c --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a2422055e9542643288a7443b823001443ae5a402e2cff85e691f7121a6398 +size 125248064 diff --git a/llama2_13b_peft/news_commentary_de/all_results.json b/llama2_13b_peft/news_commentary_de/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7f983a27ebbf7d87dd34440f5b3ab768de93a3a2 --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/all_results.json @@ -0,0 +1,11 @@ +{ + "epoch": 2.51, + "eval_loss": 0.6943792104721069, + "eval_runtime": 64.8294, + "eval_samples_per_second": 69.413, + "eval_steps_per_second": 4.35, + "train_loss": 0.7081527805328369, + "train_runtime": 4312.5386, + "train_samples_per_second": 59.13, + "train_steps_per_second": 3.696 +} \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_de/eval_results.json b/llama2_13b_peft/news_commentary_de/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..be510258d76594c25b96d4a80a69af1b0819d82d --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/eval_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 2.51, + "eval_loss": 0.6943792104721069, + "eval_runtime": 64.8294, + "eval_samples_per_second": 69.413, + "eval_steps_per_second": 4.35 +} \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_de/special_tokens_map.json b/llama2_13b_peft/news_commentary_de/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama2_13b_peft/news_commentary_de/tokenizer.model b/llama2_13b_peft/news_commentary_de/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/llama2_13b_peft/news_commentary_de/tokenizer_config.json b/llama2_13b_peft/news_commentary_de/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/llama2_13b_peft/news_commentary_de/train_results.json b/llama2_13b_peft/news_commentary_de/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1574d04dd4c2f1ff6d4ddf979ddb0f4aef9e188c --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/train_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 2.51, + "train_loss": 0.7081527805328369, + "train_runtime": 4312.5386, + "train_samples_per_second": 59.13, + "train_steps_per_second": 3.696 +} \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_de/trainer_log.jsonl b/llama2_13b_peft/news_commentary_de/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..be1137ec35039ee7721d558e8f2fa1a93b15a88b --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/trainer_log.jsonl @@ -0,0 +1,422 @@ +{"current_steps": 10, "total_steps": 15940, "loss": 1.3994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5e-06, "epoch": 0.01, "percentage": 0.06, "elapsed_time": "0:00:10", "remaining_time": "4:29:17"} +{"current_steps": 20, "total_steps": 15940, "loss": 1.4561, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1e-05, "epoch": 0.01, "percentage": 0.13, "elapsed_time": "0:00:16", "remaining_time": "3:40:16"} +{"current_steps": 30, "total_steps": 15940, "loss": 1.3697, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999990264607035e-06, "epoch": 0.02, "percentage": 0.19, "elapsed_time": "0:00:23", "remaining_time": "3:30:45"} +{"current_steps": 40, "total_steps": 15940, "loss": 1.3627, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999961058466052e-06, "epoch": 0.03, "percentage": 0.25, "elapsed_time": "0:00:30", "remaining_time": "3:21:30"} +{"current_steps": 50, "total_steps": 15940, "loss": 1.1155, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999912381690781e-06, "epoch": 0.03, "percentage": 0.31, "elapsed_time": "0:00:40", "remaining_time": "3:33:21"} +{"current_steps": 60, "total_steps": 15940, "loss": 0.9492, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999844234470782e-06, "epoch": 0.04, "percentage": 0.38, "elapsed_time": "0:00:46", "remaining_time": "3:26:39"} +{"current_steps": 70, "total_steps": 15940, "loss": 0.9067, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999756617071427e-06, "epoch": 0.04, "percentage": 0.44, "elapsed_time": "0:00:53", "remaining_time": "3:22:54"} +{"current_steps": 80, "total_steps": 15940, "loss": 0.8848, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999649529833915e-06, "epoch": 0.05, "percentage": 0.5, "elapsed_time": "0:01:00", "remaining_time": "3:20:21"} +{"current_steps": 90, "total_steps": 15940, "loss": 0.798, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999522973175257e-06, "epoch": 0.06, "percentage": 0.56, "elapsed_time": "0:01:10", "remaining_time": "3:27:40"} +{"current_steps": 100, "total_steps": 15940, "loss": 0.8782, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999376947588288e-06, "epoch": 0.06, "percentage": 0.63, "elapsed_time": "0:01:17", "remaining_time": "3:24:28"} +{"current_steps": 110, "total_steps": 15940, "loss": 0.8124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99921145364165e-06, "epoch": 0.07, "percentage": 0.69, "elapsed_time": "0:01:26", "remaining_time": "3:26:46"} +{"current_steps": 120, "total_steps": 15940, "loss": 0.838, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999026491979809e-06, "epoch": 0.08, "percentage": 0.75, "elapsed_time": "0:01:32", "remaining_time": "3:23:53"} +{"current_steps": 130, "total_steps": 15940, "loss": 0.8383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99882206332303e-06, "epoch": 0.08, "percentage": 0.82, "elapsed_time": "0:01:39", "remaining_time": "3:22:07"} +{"current_steps": 140, "total_steps": 15940, "loss": 0.8705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99859816846739e-06, "epoch": 0.09, "percentage": 0.88, "elapsed_time": "0:01:47", "remaining_time": "3:23:01"} +{"current_steps": 150, "total_steps": 15940, "loss": 0.7872, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.998354808284774e-06, "epoch": 0.09, "percentage": 0.94, "elapsed_time": "0:01:54", "remaining_time": "3:20:22"} +{"current_steps": 160, "total_steps": 15940, "loss": 0.789, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.998091983722862e-06, "epoch": 0.1, "percentage": 1.0, "elapsed_time": "0:02:02", "remaining_time": "3:21:56"} +{"current_steps": 170, "total_steps": 15940, "loss": 0.7749, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.997809695805136e-06, "epoch": 0.11, "percentage": 1.07, "elapsed_time": "0:02:10", "remaining_time": "3:22:13"} +{"current_steps": 180, "total_steps": 15940, "loss": 0.7935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99750794563087e-06, "epoch": 0.11, "percentage": 1.13, "elapsed_time": "0:02:18", "remaining_time": "3:22:32"} +{"current_steps": 190, "total_steps": 15940, "loss": 0.7817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.997186734375124e-06, "epoch": 0.12, "percentage": 1.19, "elapsed_time": "0:02:25", "remaining_time": "3:21:34"} +{"current_steps": 200, "total_steps": 15940, "loss": 0.7429, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.996846063288746e-06, "epoch": 0.13, "percentage": 1.25, "elapsed_time": "0:02:33", "remaining_time": "3:20:50"} +{"current_steps": 200, "total_steps": 15940, "loss": null, "eval_loss": 0.7712445855140686, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.13, "percentage": 1.25, "elapsed_time": "0:02:33", "remaining_time": "3:20:50"} +{"current_steps": 210, "total_steps": 15940, "loss": 0.7636, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.996485933698364e-06, "epoch": 0.13, "percentage": 1.32, "elapsed_time": "0:03:46", "remaining_time": "4:42:21"} +{"current_steps": 220, "total_steps": 15940, "loss": 0.7856, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.996106347006378e-06, "epoch": 0.14, "percentage": 1.38, "elapsed_time": "0:03:52", "remaining_time": "4:36:29"} +{"current_steps": 230, "total_steps": 15940, "loss": 0.7529, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99570730469096e-06, "epoch": 0.14, "percentage": 1.44, "elapsed_time": "0:03:58", "remaining_time": "4:31:20"} +{"current_steps": 240, "total_steps": 15940, "loss": 0.7671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.995288808306041e-06, "epoch": 0.15, "percentage": 1.51, "elapsed_time": "0:04:07", "remaining_time": "4:30:22"} +{"current_steps": 250, "total_steps": 15940, "loss": 0.7231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.994850859481312e-06, "epoch": 0.16, "percentage": 1.57, "elapsed_time": "0:04:16", "remaining_time": "4:27:58"} +{"current_steps": 260, "total_steps": 15940, "loss": 0.7694, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.994393459922219e-06, "epoch": 0.16, "percentage": 1.63, "elapsed_time": "0:04:24", "remaining_time": "4:25:29"} +{"current_steps": 270, "total_steps": 15940, "loss": 0.7661, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.993916611409941e-06, "epoch": 0.17, "percentage": 1.69, "elapsed_time": "0:04:30", "remaining_time": "4:21:37"} +{"current_steps": 280, "total_steps": 15940, "loss": 0.7952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.993420315801406e-06, "epoch": 0.18, "percentage": 1.76, "elapsed_time": "0:04:37", "remaining_time": "4:18:35"} +{"current_steps": 290, "total_steps": 15940, "loss": 0.7966, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.992904575029265e-06, "epoch": 0.18, "percentage": 1.82, "elapsed_time": "0:04:44", "remaining_time": "4:16:09"} +{"current_steps": 300, "total_steps": 15940, "loss": 0.8167, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.992369391101895e-06, "epoch": 0.19, "percentage": 1.88, "elapsed_time": "0:04:51", "remaining_time": "4:13:29"} +{"current_steps": 310, "total_steps": 15940, "loss": 0.7368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.991814766103386e-06, "epoch": 0.19, "percentage": 1.94, "elapsed_time": "0:04:58", "remaining_time": "4:11:15"} +{"current_steps": 320, "total_steps": 15940, "loss": 0.7796, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.991240702193532e-06, "epoch": 0.2, "percentage": 2.01, "elapsed_time": "0:05:06", "remaining_time": "4:09:03"} +{"current_steps": 330, "total_steps": 15940, "loss": 0.7727, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99064720160783e-06, "epoch": 0.21, "percentage": 2.07, "elapsed_time": "0:05:15", "remaining_time": "4:09:07"} +{"current_steps": 340, "total_steps": 15940, "loss": 0.7604, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.990034266657468e-06, "epoch": 0.21, "percentage": 2.13, "elapsed_time": "0:05:24", "remaining_time": "4:07:46"} +{"current_steps": 350, "total_steps": 15940, "loss": 0.7399, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.989401899729307e-06, "epoch": 0.22, "percentage": 2.2, "elapsed_time": "0:05:31", "remaining_time": "4:06:03"} +{"current_steps": 360, "total_steps": 15940, "loss": 0.7715, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.988750103285883e-06, "epoch": 0.23, "percentage": 2.26, "elapsed_time": "0:05:39", "remaining_time": "4:04:34"} +{"current_steps": 370, "total_steps": 15940, "loss": 0.738, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.988078879865396e-06, "epoch": 0.23, "percentage": 2.32, "elapsed_time": "0:05:47", "remaining_time": "4:03:51"} +{"current_steps": 380, "total_steps": 15940, "loss": 0.8025, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.987388232081694e-06, "epoch": 0.24, "percentage": 2.38, "elapsed_time": "0:05:55", "remaining_time": "4:02:42"} +{"current_steps": 390, "total_steps": 15940, "loss": 0.7561, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.98667816262427e-06, "epoch": 0.24, "percentage": 2.45, "elapsed_time": "0:06:02", "remaining_time": "4:00:39"} +{"current_steps": 400, "total_steps": 15940, "loss": 0.7549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.985948674258243e-06, "epoch": 0.25, "percentage": 2.51, "elapsed_time": "0:06:09", "remaining_time": "3:58:57"} +{"current_steps": 400, "total_steps": 15940, "loss": null, "eval_loss": 0.743410587310791, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.25, "percentage": 2.51, "elapsed_time": "0:06:09", "remaining_time": "3:58:57"} +{"current_steps": 410, "total_steps": 15940, "loss": 0.7694, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.985199769824359e-06, "epoch": 0.26, "percentage": 2.57, "elapsed_time": "0:07:22", "remaining_time": "4:39:13"} +{"current_steps": 420, "total_steps": 15940, "loss": 0.7353, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.984431452238968e-06, "epoch": 0.26, "percentage": 2.63, "elapsed_time": "0:07:29", "remaining_time": "4:36:35"} +{"current_steps": 430, "total_steps": 15940, "loss": 0.7299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.983643724494017e-06, "epoch": 0.27, "percentage": 2.7, "elapsed_time": "0:07:35", "remaining_time": "4:33:43"} +{"current_steps": 440, "total_steps": 15940, "loss": 0.754, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.982836589657043e-06, "epoch": 0.28, "percentage": 2.76, "elapsed_time": "0:07:42", "remaining_time": "4:31:15"} +{"current_steps": 450, "total_steps": 15940, "loss": 0.7355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.98201005087116e-06, "epoch": 0.28, "percentage": 2.82, "elapsed_time": "0:07:48", "remaining_time": "4:29:03"} +{"current_steps": 460, "total_steps": 15940, "loss": 0.7543, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.981164111355036e-06, "epoch": 0.29, "percentage": 2.89, "elapsed_time": "0:07:56", "remaining_time": "4:27:23"} +{"current_steps": 470, "total_steps": 15940, "loss": 0.7568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.98029877440289e-06, "epoch": 0.29, "percentage": 2.95, "elapsed_time": "0:08:03", "remaining_time": "4:25:00"} +{"current_steps": 480, "total_steps": 15940, "loss": 0.7313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.979414043384485e-06, "epoch": 0.3, "percentage": 3.01, "elapsed_time": "0:08:10", "remaining_time": "4:23:26"} +{"current_steps": 490, "total_steps": 15940, "loss": 0.7456, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.978509921745101e-06, "epoch": 0.31, "percentage": 3.07, "elapsed_time": "0:08:17", "remaining_time": "4:21:27"} +{"current_steps": 500, "total_steps": 15940, "loss": 0.7585, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.97758641300553e-06, "epoch": 0.31, "percentage": 3.14, "elapsed_time": "0:08:24", "remaining_time": "4:19:29"} +{"current_steps": 510, "total_steps": 15940, "loss": 0.7311, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.97664352076206e-06, "epoch": 0.32, "percentage": 3.2, "elapsed_time": "0:08:30", "remaining_time": "4:17:28"} +{"current_steps": 520, "total_steps": 15940, "loss": 0.7173, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.97568124868646e-06, "epoch": 0.33, "percentage": 3.26, "elapsed_time": "0:08:37", "remaining_time": "4:15:51"} +{"current_steps": 530, "total_steps": 15940, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.974699600525972e-06, "epoch": 0.33, "percentage": 3.32, "elapsed_time": "0:08:45", "remaining_time": "4:14:32"} +{"current_steps": 540, "total_steps": 15940, "loss": 0.757, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.973698580103286e-06, "epoch": 0.34, "percentage": 3.39, "elapsed_time": "0:08:52", "remaining_time": "4:13:06"} +{"current_steps": 550, "total_steps": 15940, "loss": 0.7717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.972678191316533e-06, "epoch": 0.35, "percentage": 3.45, "elapsed_time": "0:08:59", "remaining_time": "4:11:22"} +{"current_steps": 560, "total_steps": 15940, "loss": 0.7314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.971638438139266e-06, "epoch": 0.35, "percentage": 3.51, "elapsed_time": "0:09:08", "remaining_time": "4:10:58"} +{"current_steps": 570, "total_steps": 15940, "loss": 0.7112, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.97057932462045e-06, "epoch": 0.36, "percentage": 3.58, "elapsed_time": "0:09:14", "remaining_time": "4:09:21"} +{"current_steps": 580, "total_steps": 15940, "loss": 0.7802, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.96950085488444e-06, "epoch": 0.36, "percentage": 3.64, "elapsed_time": "0:09:20", "remaining_time": "4:07:31"} +{"current_steps": 590, "total_steps": 15940, "loss": 0.7472, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.968403033130963e-06, "epoch": 0.37, "percentage": 3.7, "elapsed_time": "0:09:27", "remaining_time": "4:06:11"} +{"current_steps": 600, "total_steps": 15940, "loss": 0.7552, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.967285863635112e-06, "epoch": 0.38, "percentage": 3.76, "elapsed_time": "0:09:34", "remaining_time": "4:04:48"} +{"current_steps": 600, "total_steps": 15940, "loss": null, "eval_loss": 0.733000636100769, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.38, "percentage": 3.76, "elapsed_time": "0:09:34", "remaining_time": "4:04:48"} +{"current_steps": 610, "total_steps": 15940, "loss": 0.7274, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.966149350747321e-06, "epoch": 0.38, "percentage": 3.83, "elapsed_time": "0:10:50", "remaining_time": "4:32:16"} +{"current_steps": 620, "total_steps": 15940, "loss": 0.7734, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.964993498893349e-06, "epoch": 0.39, "percentage": 3.89, "elapsed_time": "0:10:56", "remaining_time": "4:30:28"} +{"current_steps": 630, "total_steps": 15940, "loss": 0.7117, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.963818312574265e-06, "epoch": 0.4, "percentage": 3.95, "elapsed_time": "0:11:03", "remaining_time": "4:28:40"} +{"current_steps": 640, "total_steps": 15940, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.962623796366428e-06, "epoch": 0.4, "percentage": 4.02, "elapsed_time": "0:11:10", "remaining_time": "4:27:00"} +{"current_steps": 650, "total_steps": 15940, "loss": 0.764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.961409954921472e-06, "epoch": 0.41, "percentage": 4.08, "elapsed_time": "0:11:16", "remaining_time": "4:25:11"} +{"current_steps": 660, "total_steps": 15940, "loss": 0.7385, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.96017679296629e-06, "epoch": 0.41, "percentage": 4.14, "elapsed_time": "0:11:24", "remaining_time": "4:24:00"} +{"current_steps": 670, "total_steps": 15940, "loss": 0.7386, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.958924315303005e-06, "epoch": 0.42, "percentage": 4.2, "elapsed_time": "0:11:32", "remaining_time": "4:22:59"} +{"current_steps": 680, "total_steps": 15940, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.95765252680896e-06, "epoch": 0.43, "percentage": 4.27, "elapsed_time": "0:11:38", "remaining_time": "4:21:18"} +{"current_steps": 690, "total_steps": 15940, "loss": 0.7104, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.956361432436705e-06, "epoch": 0.43, "percentage": 4.33, "elapsed_time": "0:11:45", "remaining_time": "4:20:01"} +{"current_steps": 700, "total_steps": 15940, "loss": 0.6988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.95505103721396e-06, "epoch": 0.44, "percentage": 4.39, "elapsed_time": "0:11:54", "remaining_time": "4:19:25"} +{"current_steps": 710, "total_steps": 15940, "loss": 0.7177, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.953721346243613e-06, "epoch": 0.45, "percentage": 4.45, "elapsed_time": "0:12:04", "remaining_time": "4:18:50"} +{"current_steps": 720, "total_steps": 15940, "loss": 0.6804, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.952372364703688e-06, "epoch": 0.45, "percentage": 4.52, "elapsed_time": "0:12:10", "remaining_time": "4:17:32"} +{"current_steps": 730, "total_steps": 15940, "loss": 0.7432, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.95100409784733e-06, "epoch": 0.46, "percentage": 4.58, "elapsed_time": "0:12:18", "remaining_time": "4:16:31"} +{"current_steps": 740, "total_steps": 15940, "loss": 0.7521, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.949616551002787e-06, "epoch": 0.46, "percentage": 4.64, "elapsed_time": "0:12:25", "remaining_time": "4:15:08"} +{"current_steps": 750, "total_steps": 15940, "loss": 0.7624, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.948209729573384e-06, "epoch": 0.47, "percentage": 4.71, "elapsed_time": "0:12:32", "remaining_time": "4:13:54"} +{"current_steps": 760, "total_steps": 15940, "loss": 0.7619, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.946783639037503e-06, "epoch": 0.48, "percentage": 4.77, "elapsed_time": "0:12:39", "remaining_time": "4:12:40"} +{"current_steps": 770, "total_steps": 15940, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.945338284948568e-06, "epoch": 0.48, "percentage": 4.83, "elapsed_time": "0:12:47", "remaining_time": "4:11:56"} +{"current_steps": 780, "total_steps": 15940, "loss": 0.7621, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.943873672935014e-06, "epoch": 0.49, "percentage": 4.89, "elapsed_time": "0:12:55", "remaining_time": "4:11:17"} +{"current_steps": 790, "total_steps": 15940, "loss": 0.6923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.94238980870027e-06, "epoch": 0.5, "percentage": 4.96, "elapsed_time": "0:13:03", "remaining_time": "4:10:30"} +{"current_steps": 800, "total_steps": 15940, "loss": 0.7265, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.940886698022733e-06, "epoch": 0.5, "percentage": 5.02, "elapsed_time": "0:13:12", "remaining_time": "4:09:50"} +{"current_steps": 800, "total_steps": 15940, "loss": null, "eval_loss": 0.7256230711936951, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.5, "percentage": 5.02, "elapsed_time": "0:13:12", "remaining_time": "4:09:50"} +{"current_steps": 810, "total_steps": 15940, "loss": 0.6976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.93936434675576e-06, "epoch": 0.51, "percentage": 5.08, "elapsed_time": "0:14:31", "remaining_time": "4:31:20"} +{"current_steps": 820, "total_steps": 15940, "loss": 0.7028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.93782276082762e-06, "epoch": 0.51, "percentage": 5.14, "elapsed_time": "0:14:38", "remaining_time": "4:30:00"} +{"current_steps": 830, "total_steps": 15940, "loss": 0.7253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.936261946241492e-06, "epoch": 0.52, "percentage": 5.21, "elapsed_time": "0:14:45", "remaining_time": "4:28:47"} +{"current_steps": 840, "total_steps": 15940, "loss": 0.7096, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.934681909075434e-06, "epoch": 0.53, "percentage": 5.27, "elapsed_time": "0:14:55", "remaining_time": "4:28:15"} +{"current_steps": 850, "total_steps": 15940, "loss": 0.745, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.93308265548236e-06, "epoch": 0.53, "percentage": 5.33, "elapsed_time": "0:15:02", "remaining_time": "4:27:00"} +{"current_steps": 860, "total_steps": 15940, "loss": 0.7111, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.931464191690015e-06, "epoch": 0.54, "percentage": 5.4, "elapsed_time": "0:15:10", "remaining_time": "4:25:57"} +{"current_steps": 870, "total_steps": 15940, "loss": 0.7296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.929826524000948e-06, "epoch": 0.55, "percentage": 5.46, "elapsed_time": "0:15:19", "remaining_time": "4:25:33"} +{"current_steps": 880, "total_steps": 15940, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.928169658792498e-06, "epoch": 0.55, "percentage": 5.52, "elapsed_time": "0:15:29", "remaining_time": "4:25:09"} +{"current_steps": 890, "total_steps": 15940, "loss": 0.7156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.926493602516758e-06, "epoch": 0.56, "percentage": 5.58, "elapsed_time": "0:15:37", "remaining_time": "4:24:14"} +{"current_steps": 900, "total_steps": 15940, "loss": 0.7956, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.924798361700554e-06, "epoch": 0.56, "percentage": 5.65, "elapsed_time": "0:15:48", "remaining_time": "4:24:14"} +{"current_steps": 910, "total_steps": 15940, "loss": 0.7361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.923083942945419e-06, "epoch": 0.57, "percentage": 5.71, "elapsed_time": "0:15:56", "remaining_time": "4:23:18"} +{"current_steps": 920, "total_steps": 15940, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.92135035292757e-06, "epoch": 0.58, "percentage": 5.77, "elapsed_time": "0:16:05", "remaining_time": "4:22:44"} +{"current_steps": 930, "total_steps": 15940, "loss": 0.6967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.919597598397882e-06, "epoch": 0.58, "percentage": 5.83, "elapsed_time": "0:16:13", "remaining_time": "4:21:45"} +{"current_steps": 940, "total_steps": 15940, "loss": 0.7509, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.91782568618185e-06, "epoch": 0.59, "percentage": 5.9, "elapsed_time": "0:16:21", "remaining_time": "4:20:59"} +{"current_steps": 950, "total_steps": 15940, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.916034623179584e-06, "epoch": 0.6, "percentage": 5.96, "elapsed_time": "0:16:29", "remaining_time": "4:20:18"} +{"current_steps": 960, "total_steps": 15940, "loss": 0.7194, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.914224416365765e-06, "epoch": 0.6, "percentage": 6.02, "elapsed_time": "0:16:38", "remaining_time": "4:19:47"} +{"current_steps": 970, "total_steps": 15940, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.91239507278962e-06, "epoch": 0.61, "percentage": 6.09, "elapsed_time": "0:16:45", "remaining_time": "4:18:42"} +{"current_steps": 980, "total_steps": 15940, "loss": 0.7314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.910546599574903e-06, "epoch": 0.61, "percentage": 6.15, "elapsed_time": "0:16:53", "remaining_time": "4:17:50"} +{"current_steps": 990, "total_steps": 15940, "loss": 0.7549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.908679003919856e-06, "epoch": 0.62, "percentage": 6.21, "elapsed_time": "0:17:00", "remaining_time": "4:16:49"} +{"current_steps": 1000, "total_steps": 15940, "loss": 0.7524, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.906792293097194e-06, "epoch": 0.63, "percentage": 6.27, "elapsed_time": "0:17:08", "remaining_time": "4:16:10"} +{"current_steps": 1000, "total_steps": 15940, "loss": null, "eval_loss": 0.7200015187263489, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.63, "percentage": 6.27, "elapsed_time": "0:17:08", "remaining_time": "4:16:10"} +{"current_steps": 1010, "total_steps": 15940, "loss": 0.7218, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.904886474454063e-06, "epoch": 0.63, "percentage": 6.34, "elapsed_time": "0:18:26", "remaining_time": "4:32:34"} +{"current_steps": 1020, "total_steps": 15940, "loss": 0.7171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.90296155541202e-06, "epoch": 0.64, "percentage": 6.4, "elapsed_time": "0:18:33", "remaining_time": "4:31:23"} +{"current_steps": 1030, "total_steps": 15940, "loss": 0.758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.901017543467005e-06, "epoch": 0.65, "percentage": 6.46, "elapsed_time": "0:18:40", "remaining_time": "4:30:21"} +{"current_steps": 1040, "total_steps": 15940, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.899054446189305e-06, "epoch": 0.65, "percentage": 6.52, "elapsed_time": "0:18:47", "remaining_time": "4:29:18"} +{"current_steps": 1050, "total_steps": 15940, "loss": 0.7088, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.897072271223526e-06, "epoch": 0.66, "percentage": 6.59, "elapsed_time": "0:18:54", "remaining_time": "4:28:07"} +{"current_steps": 1060, "total_steps": 15940, "loss": 0.7804, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.895071026288574e-06, "epoch": 0.66, "percentage": 6.65, "elapsed_time": "0:19:01", "remaining_time": "4:27:00"} +{"current_steps": 1070, "total_steps": 15940, "loss": 0.6935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.893050719177608e-06, "epoch": 0.67, "percentage": 6.71, "elapsed_time": "0:19:07", "remaining_time": "4:25:52"} +{"current_steps": 1080, "total_steps": 15940, "loss": 0.6894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.891011357758022e-06, "epoch": 0.68, "percentage": 6.78, "elapsed_time": "0:19:14", "remaining_time": "4:24:42"} +{"current_steps": 1090, "total_steps": 15940, "loss": 0.7244, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.888952949971411e-06, "epoch": 0.68, "percentage": 6.84, "elapsed_time": "0:19:22", "remaining_time": "4:24:00"} +{"current_steps": 1100, "total_steps": 15940, "loss": 0.8156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.886875503833537e-06, "epoch": 0.69, "percentage": 6.9, "elapsed_time": "0:19:29", "remaining_time": "4:22:51"} +{"current_steps": 1110, "total_steps": 15940, "loss": 0.7478, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.884779027434304e-06, "epoch": 0.7, "percentage": 6.96, "elapsed_time": "0:19:35", "remaining_time": "4:21:50"} +{"current_steps": 1120, "total_steps": 15940, "loss": 0.7187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.882663528937716e-06, "epoch": 0.7, "percentage": 7.03, "elapsed_time": "0:19:42", "remaining_time": "4:20:47"} +{"current_steps": 1130, "total_steps": 15940, "loss": 0.7145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.880529016581863e-06, "epoch": 0.71, "percentage": 7.09, "elapsed_time": "0:19:51", "remaining_time": "4:20:11"} +{"current_steps": 1140, "total_steps": 15940, "loss": 0.731, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.878375498678869e-06, "epoch": 0.72, "percentage": 7.15, "elapsed_time": "0:19:57", "remaining_time": "4:19:07"} +{"current_steps": 1150, "total_steps": 15940, "loss": 0.7323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.876202983614868e-06, "epoch": 0.72, "percentage": 7.21, "elapsed_time": "0:20:06", "remaining_time": "4:18:33"} +{"current_steps": 1160, "total_steps": 15940, "loss": 0.6757, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.874011479849981e-06, "epoch": 0.73, "percentage": 7.28, "elapsed_time": "0:20:12", "remaining_time": "4:17:33"} +{"current_steps": 1170, "total_steps": 15940, "loss": 0.7258, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.871800995918264e-06, "epoch": 0.73, "percentage": 7.34, "elapsed_time": "0:20:19", "remaining_time": "4:16:40"} +{"current_steps": 1180, "total_steps": 15940, "loss": 0.7334, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.86957154042769e-06, "epoch": 0.74, "percentage": 7.4, "elapsed_time": "0:20:26", "remaining_time": "4:15:43"} +{"current_steps": 1190, "total_steps": 15940, "loss": 0.7358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.867323122060108e-06, "epoch": 0.75, "percentage": 7.47, "elapsed_time": "0:20:36", "remaining_time": "4:15:30"} +{"current_steps": 1200, "total_steps": 15940, "loss": 0.6976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.865055749571215e-06, "epoch": 0.75, "percentage": 7.53, "elapsed_time": "0:20:45", "remaining_time": "4:14:58"} +{"current_steps": 1200, "total_steps": 15940, "loss": null, "eval_loss": 0.7151169180870056, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.75, "percentage": 7.53, "elapsed_time": "0:20:45", "remaining_time": "4:14:58"} +{"current_steps": 1210, "total_steps": 15940, "loss": 0.6983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.862769431790513e-06, "epoch": 0.76, "percentage": 7.59, "elapsed_time": "0:21:57", "remaining_time": "4:27:14"} +{"current_steps": 1220, "total_steps": 15940, "loss": 0.7171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.860464177621286e-06, "epoch": 0.77, "percentage": 7.65, "elapsed_time": "0:22:04", "remaining_time": "4:26:20"} +{"current_steps": 1230, "total_steps": 15940, "loss": 0.7206, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.858139996040554e-06, "epoch": 0.77, "percentage": 7.72, "elapsed_time": "0:22:14", "remaining_time": "4:26:04"} +{"current_steps": 1240, "total_steps": 15940, "loss": 0.7368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.855796896099044e-06, "epoch": 0.78, "percentage": 7.78, "elapsed_time": "0:22:21", "remaining_time": "4:24:58"} +{"current_steps": 1250, "total_steps": 15940, "loss": 0.7372, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.85343488692116e-06, "epoch": 0.78, "percentage": 7.84, "elapsed_time": "0:22:27", "remaining_time": "4:23:56"} +{"current_steps": 1260, "total_steps": 15940, "loss": 0.7373, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.851053977704931e-06, "epoch": 0.79, "percentage": 7.9, "elapsed_time": "0:22:34", "remaining_time": "4:22:57"} +{"current_steps": 1270, "total_steps": 15940, "loss": 0.7608, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.848654177721999e-06, "epoch": 0.8, "percentage": 7.97, "elapsed_time": "0:22:42", "remaining_time": "4:22:17"} +{"current_steps": 1280, "total_steps": 15940, "loss": 0.7227, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.846235496317556e-06, "epoch": 0.8, "percentage": 8.03, "elapsed_time": "0:22:49", "remaining_time": "4:21:23"} +{"current_steps": 1290, "total_steps": 15940, "loss": 0.7415, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.843797942910328e-06, "epoch": 0.81, "percentage": 8.09, "elapsed_time": "0:22:56", "remaining_time": "4:20:29"} +{"current_steps": 1300, "total_steps": 15940, "loss": 0.7206, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.841341526992536e-06, "epoch": 0.82, "percentage": 8.16, "elapsed_time": "0:23:03", "remaining_time": "4:19:39"} +{"current_steps": 1310, "total_steps": 15940, "loss": 0.6704, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.838866258129847e-06, "epoch": 0.82, "percentage": 8.22, "elapsed_time": "0:23:10", "remaining_time": "4:18:53"} +{"current_steps": 1320, "total_steps": 15940, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.836372145961346e-06, "epoch": 0.83, "percentage": 8.28, "elapsed_time": "0:23:19", "remaining_time": "4:18:19"} +{"current_steps": 1330, "total_steps": 15940, "loss": 0.6916, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.833859200199498e-06, "epoch": 0.83, "percentage": 8.34, "elapsed_time": "0:23:27", "remaining_time": "4:17:44"} +{"current_steps": 1340, "total_steps": 15940, "loss": 0.7568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.83132743063011e-06, "epoch": 0.84, "percentage": 8.41, "elapsed_time": "0:23:34", "remaining_time": "4:16:51"} +{"current_steps": 1350, "total_steps": 15940, "loss": 0.7017, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.82877684711229e-06, "epoch": 0.85, "percentage": 8.47, "elapsed_time": "0:23:40", "remaining_time": "4:15:55"} +{"current_steps": 1360, "total_steps": 15940, "loss": 0.7127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.826207459578412e-06, "epoch": 0.85, "percentage": 8.53, "elapsed_time": "0:23:48", "remaining_time": "4:15:13"} +{"current_steps": 1370, "total_steps": 15940, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.823619278034073e-06, "epoch": 0.86, "percentage": 8.59, "elapsed_time": "0:23:56", "remaining_time": "4:14:33"} +{"current_steps": 1380, "total_steps": 15940, "loss": 0.6942, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.821012312558059e-06, "epoch": 0.87, "percentage": 8.66, "elapsed_time": "0:24:04", "remaining_time": "4:13:56"} +{"current_steps": 1390, "total_steps": 15940, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.818386573302305e-06, "epoch": 0.87, "percentage": 8.72, "elapsed_time": "0:24:12", "remaining_time": "4:13:20"} +{"current_steps": 1400, "total_steps": 15940, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.815742070491852e-06, "epoch": 0.88, "percentage": 8.78, "elapsed_time": "0:24:18", "remaining_time": "4:12:31"} +{"current_steps": 1400, "total_steps": 15940, "loss": null, "eval_loss": 0.7116020917892456, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.88, "percentage": 8.78, "elapsed_time": "0:24:18", "remaining_time": "4:12:31"} +{"current_steps": 1410, "total_steps": 15940, "loss": 0.7105, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.81307881442481e-06, "epoch": 0.88, "percentage": 8.85, "elapsed_time": "0:25:30", "remaining_time": "4:22:52"} +{"current_steps": 1420, "total_steps": 15940, "loss": 0.6994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.810396815472316e-06, "epoch": 0.89, "percentage": 8.91, "elapsed_time": "0:25:37", "remaining_time": "4:21:56"} +{"current_steps": 1430, "total_steps": 15940, "loss": 0.7459, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.807696084078494e-06, "epoch": 0.9, "percentage": 8.97, "elapsed_time": "0:25:44", "remaining_time": "4:21:16"} +{"current_steps": 1440, "total_steps": 15940, "loss": 0.7048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.804976630760419e-06, "epoch": 0.9, "percentage": 9.03, "elapsed_time": "0:25:51", "remaining_time": "4:20:19"} +{"current_steps": 1450, "total_steps": 15940, "loss": 0.6975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.802238466108068e-06, "epoch": 0.91, "percentage": 9.1, "elapsed_time": "0:25:57", "remaining_time": "4:19:24"} +{"current_steps": 1460, "total_steps": 15940, "loss": 0.737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.799481600784286e-06, "epoch": 0.92, "percentage": 9.16, "elapsed_time": "0:26:04", "remaining_time": "4:18:36"} +{"current_steps": 1470, "total_steps": 15940, "loss": 0.7236, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.796706045524738e-06, "epoch": 0.92, "percentage": 9.22, "elapsed_time": "0:26:13", "remaining_time": "4:18:10"} +{"current_steps": 1480, "total_steps": 15940, "loss": 0.6984, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.793911811137874e-06, "epoch": 0.93, "percentage": 9.28, "elapsed_time": "0:26:20", "remaining_time": "4:17:20"} +{"current_steps": 1490, "total_steps": 15940, "loss": 0.8019, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.791098908504884e-06, "epoch": 0.93, "percentage": 9.35, "elapsed_time": "0:26:27", "remaining_time": "4:16:34"} +{"current_steps": 1500, "total_steps": 15940, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.788267348579649e-06, "epoch": 0.94, "percentage": 9.41, "elapsed_time": "0:26:34", "remaining_time": "4:15:52"} +{"current_steps": 1510, "total_steps": 15940, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.78541714238871e-06, "epoch": 0.95, "percentage": 9.47, "elapsed_time": "0:26:42", "remaining_time": "4:15:17"} +{"current_steps": 1520, "total_steps": 15940, "loss": 0.7434, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.782548301031218e-06, "epoch": 0.95, "percentage": 9.54, "elapsed_time": "0:26:49", "remaining_time": "4:14:27"} +{"current_steps": 1530, "total_steps": 15940, "loss": 0.7193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.77966083567889e-06, "epoch": 0.96, "percentage": 9.6, "elapsed_time": "0:26:58", "remaining_time": "4:14:04"} +{"current_steps": 1540, "total_steps": 15940, "loss": 0.7763, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.776754757575975e-06, "epoch": 0.97, "percentage": 9.66, "elapsed_time": "0:27:07", "remaining_time": "4:13:34"} +{"current_steps": 1550, "total_steps": 15940, "loss": 0.7494, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.773830078039193e-06, "epoch": 0.97, "percentage": 9.72, "elapsed_time": "0:27:14", "remaining_time": "4:12:56"} +{"current_steps": 1560, "total_steps": 15940, "loss": 0.7078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.77088680845771e-06, "epoch": 0.98, "percentage": 9.79, "elapsed_time": "0:27:22", "remaining_time": "4:12:21"} +{"current_steps": 1570, "total_steps": 15940, "loss": 0.7468, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.767924960293076e-06, "epoch": 0.98, "percentage": 9.85, "elapsed_time": "0:27:30", "remaining_time": "4:11:49"} +{"current_steps": 1580, "total_steps": 15940, "loss": 0.7502, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.764944545079197e-06, "epoch": 0.99, "percentage": 9.91, "elapsed_time": "0:27:38", "remaining_time": "4:11:11"} +{"current_steps": 1590, "total_steps": 15940, "loss": 0.7337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.761945574422276e-06, "epoch": 1.0, "percentage": 9.97, "elapsed_time": "0:27:44", "remaining_time": "4:10:24"} +{"current_steps": 1600, "total_steps": 15940, "loss": 0.701, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.758928060000779e-06, "epoch": 1.0, "percentage": 10.04, "elapsed_time": "0:27:52", "remaining_time": "4:09:48"} +{"current_steps": 1600, "total_steps": 15940, "loss": null, "eval_loss": 0.7084596157073975, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.0, "percentage": 10.04, "elapsed_time": "0:27:52", "remaining_time": "4:09:48"} +{"current_steps": 1610, "total_steps": 15940, "loss": 0.7014, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.755892013565377e-06, "epoch": 1.01, "percentage": 10.1, "elapsed_time": "0:29:04", "remaining_time": "4:18:50"} +{"current_steps": 1620, "total_steps": 15940, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.752837446938915e-06, "epoch": 1.02, "percentage": 10.16, "elapsed_time": "0:29:12", "remaining_time": "4:18:07"} +{"current_steps": 1630, "total_steps": 15940, "loss": 0.7268, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.749764372016355e-06, "epoch": 1.02, "percentage": 10.23, "elapsed_time": "0:29:19", "remaining_time": "4:17:23"} +{"current_steps": 1640, "total_steps": 15940, "loss": 0.6968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.746672800764734e-06, "epoch": 1.03, "percentage": 10.29, "elapsed_time": "0:29:25", "remaining_time": "4:16:37"} +{"current_steps": 1650, "total_steps": 15940, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.743562745223118e-06, "epoch": 1.04, "percentage": 10.35, "elapsed_time": "0:29:32", "remaining_time": "4:15:51"} +{"current_steps": 1660, "total_steps": 15940, "loss": 0.7199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.740434217502549e-06, "epoch": 1.04, "percentage": 10.41, "elapsed_time": "0:29:40", "remaining_time": "4:15:17"} +{"current_steps": 1670, "total_steps": 15940, "loss": 0.687, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.737287229786007e-06, "epoch": 1.05, "percentage": 10.48, "elapsed_time": "0:29:47", "remaining_time": "4:14:33"} +{"current_steps": 1680, "total_steps": 15940, "loss": 0.7003, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.734121794328358e-06, "epoch": 1.05, "percentage": 10.54, "elapsed_time": "0:29:53", "remaining_time": "4:13:45"} +{"current_steps": 1690, "total_steps": 15940, "loss": 0.7329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.730937923456303e-06, "epoch": 1.06, "percentage": 10.6, "elapsed_time": "0:30:00", "remaining_time": "4:12:57"} +{"current_steps": 1700, "total_steps": 15940, "loss": 0.6924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.727735629568335e-06, "epoch": 1.07, "percentage": 10.66, "elapsed_time": "0:30:07", "remaining_time": "4:12:24"} +{"current_steps": 1710, "total_steps": 15940, "loss": 0.7219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.724514925134696e-06, "epoch": 1.07, "percentage": 10.73, "elapsed_time": "0:30:15", "remaining_time": "4:11:44"} +{"current_steps": 1720, "total_steps": 15940, "loss": 0.6741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.721275822697307e-06, "epoch": 1.08, "percentage": 10.79, "elapsed_time": "0:30:21", "remaining_time": "4:10:59"} +{"current_steps": 1730, "total_steps": 15940, "loss": 0.7047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.718018334869748e-06, "epoch": 1.09, "percentage": 10.85, "elapsed_time": "0:30:29", "remaining_time": "4:10:28"} +{"current_steps": 1740, "total_steps": 15940, "loss": 0.7156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.714742474337187e-06, "epoch": 1.09, "percentage": 10.92, "elapsed_time": "0:30:37", "remaining_time": "4:09:58"} +{"current_steps": 1750, "total_steps": 15940, "loss": 0.6887, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.711448253856336e-06, "epoch": 1.1, "percentage": 10.98, "elapsed_time": "0:30:47", "remaining_time": "4:09:40"} +{"current_steps": 1760, "total_steps": 15940, "loss": 0.7373, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.708135686255415e-06, "epoch": 1.1, "percentage": 11.04, "elapsed_time": "0:30:55", "remaining_time": "4:09:10"} +{"current_steps": 1770, "total_steps": 15940, "loss": 0.6652, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.704804784434077e-06, "epoch": 1.11, "percentage": 11.1, "elapsed_time": "0:31:02", "remaining_time": "4:08:29"} +{"current_steps": 1780, "total_steps": 15940, "loss": 0.682, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.701455561363378e-06, "epoch": 1.12, "percentage": 11.17, "elapsed_time": "0:31:09", "remaining_time": "4:07:48"} +{"current_steps": 1790, "total_steps": 15940, "loss": 0.6844, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.698088030085721e-06, "epoch": 1.12, "percentage": 11.23, "elapsed_time": "0:31:17", "remaining_time": "4:07:20"} +{"current_steps": 1800, "total_steps": 15940, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.694702203714801e-06, "epoch": 1.13, "percentage": 11.29, "elapsed_time": "0:31:23", "remaining_time": "4:06:36"} +{"current_steps": 1800, "total_steps": 15940, "loss": null, "eval_loss": 0.705936074256897, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.13, "percentage": 11.29, "elapsed_time": "0:31:23", "remaining_time": "4:06:36"} +{"current_steps": 1810, "total_steps": 15940, "loss": 0.6897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.691298095435559e-06, "epoch": 1.14, "percentage": 11.36, "elapsed_time": "0:32:36", "remaining_time": "4:14:32"} +{"current_steps": 1820, "total_steps": 15940, "loss": 0.6851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.687875718504126e-06, "epoch": 1.14, "percentage": 11.42, "elapsed_time": "0:32:43", "remaining_time": "4:13:54"} +{"current_steps": 1830, "total_steps": 15940, "loss": 0.7132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.684435086247777e-06, "epoch": 1.15, "percentage": 11.48, "elapsed_time": "0:32:51", "remaining_time": "4:13:21"} +{"current_steps": 1840, "total_steps": 15940, "loss": 0.7129, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.680976212064875e-06, "epoch": 1.15, "percentage": 11.54, "elapsed_time": "0:32:58", "remaining_time": "4:12:40"} +{"current_steps": 1850, "total_steps": 15940, "loss": 0.6907, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.677499109424818e-06, "epoch": 1.16, "percentage": 11.61, "elapsed_time": "0:33:05", "remaining_time": "4:11:59"} +{"current_steps": 1860, "total_steps": 15940, "loss": 0.7327, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.674003791867993e-06, "epoch": 1.17, "percentage": 11.67, "elapsed_time": "0:33:12", "remaining_time": "4:11:22"} +{"current_steps": 1870, "total_steps": 15940, "loss": 0.7304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.670490273005713e-06, "epoch": 1.17, "percentage": 11.73, "elapsed_time": "0:33:20", "remaining_time": "4:10:50"} +{"current_steps": 1880, "total_steps": 15940, "loss": 0.7076, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.666958566520175e-06, "epoch": 1.18, "percentage": 11.79, "elapsed_time": "0:33:27", "remaining_time": "4:10:12"} +{"current_steps": 1890, "total_steps": 15940, "loss": 0.691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.663408686164399e-06, "epoch": 1.19, "percentage": 11.86, "elapsed_time": "0:33:33", "remaining_time": "4:09:30"} +{"current_steps": 1900, "total_steps": 15940, "loss": 0.74, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.659840645762176e-06, "epoch": 1.19, "percentage": 11.92, "elapsed_time": "0:33:40", "remaining_time": "4:08:51"} +{"current_steps": 1910, "total_steps": 15940, "loss": 0.7295, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.656254459208015e-06, "epoch": 1.2, "percentage": 11.98, "elapsed_time": "0:33:49", "remaining_time": "4:08:25"} +{"current_steps": 1920, "total_steps": 15940, "loss": 0.651, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.652650140467094e-06, "epoch": 1.2, "percentage": 12.05, "elapsed_time": "0:33:58", "remaining_time": "4:08:05"} +{"current_steps": 1930, "total_steps": 15940, "loss": 0.7028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.649027703575193e-06, "epoch": 1.21, "percentage": 12.11, "elapsed_time": "0:34:06", "remaining_time": "4:07:32"} +{"current_steps": 1940, "total_steps": 15940, "loss": 0.7179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.645387162638652e-06, "epoch": 1.22, "percentage": 12.17, "elapsed_time": "0:34:15", "remaining_time": "4:07:10"} +{"current_steps": 1950, "total_steps": 15940, "loss": 0.6872, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.641728531834313e-06, "epoch": 1.22, "percentage": 12.23, "elapsed_time": "0:34:21", "remaining_time": "4:06:28"} +{"current_steps": 1960, "total_steps": 15940, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.638051825409454e-06, "epoch": 1.23, "percentage": 12.3, "elapsed_time": "0:34:28", "remaining_time": "4:05:51"} +{"current_steps": 1970, "total_steps": 15940, "loss": 0.7183, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.634357057681749e-06, "epoch": 1.24, "percentage": 12.36, "elapsed_time": "0:34:35", "remaining_time": "4:05:17"} +{"current_steps": 1980, "total_steps": 15940, "loss": 0.6795, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.630644243039207e-06, "epoch": 1.24, "percentage": 12.42, "elapsed_time": "0:34:42", "remaining_time": "4:04:41"} +{"current_steps": 1990, "total_steps": 15940, "loss": 0.7075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.62691339594011e-06, "epoch": 1.25, "percentage": 12.48, "elapsed_time": "0:34:48", "remaining_time": "4:04:03"} +{"current_steps": 2000, "total_steps": 15940, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.623164530912963e-06, "epoch": 1.25, "percentage": 12.55, "elapsed_time": "0:34:56", "remaining_time": "4:03:33"} +{"current_steps": 2000, "total_steps": 15940, "loss": null, "eval_loss": 0.7040402293205261, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.25, "percentage": 12.55, "elapsed_time": "0:34:56", "remaining_time": "4:03:33"} +{"current_steps": 2010, "total_steps": 15940, "loss": 0.6947, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.619397662556434e-06, "epoch": 1.26, "percentage": 12.61, "elapsed_time": "0:36:08", "remaining_time": "4:10:30"} +{"current_steps": 2020, "total_steps": 15940, "loss": 0.7102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.615612805539305e-06, "epoch": 1.27, "percentage": 12.67, "elapsed_time": "0:36:15", "remaining_time": "4:09:51"} +{"current_steps": 2030, "total_steps": 15940, "loss": 0.7068, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.6118099746004e-06, "epoch": 1.27, "percentage": 12.74, "elapsed_time": "0:36:25", "remaining_time": "4:09:32"} +{"current_steps": 2040, "total_steps": 15940, "loss": 0.6528, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.607989184548544e-06, "epoch": 1.28, "percentage": 12.8, "elapsed_time": "0:36:31", "remaining_time": "4:08:51"} +{"current_steps": 2050, "total_steps": 15940, "loss": 0.6838, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.604150450262488e-06, "epoch": 1.29, "percentage": 12.86, "elapsed_time": "0:36:38", "remaining_time": "4:08:13"} +{"current_steps": 2060, "total_steps": 15940, "loss": 0.6908, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.600293786690873e-06, "epoch": 1.29, "percentage": 12.92, "elapsed_time": "0:36:45", "remaining_time": "4:07:38"} +{"current_steps": 2070, "total_steps": 15940, "loss": 0.7153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.596419208852152e-06, "epoch": 1.3, "percentage": 12.99, "elapsed_time": "0:36:52", "remaining_time": "4:07:01"} +{"current_steps": 2080, "total_steps": 15940, "loss": 0.67, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.592526731834536e-06, "epoch": 1.3, "percentage": 13.05, "elapsed_time": "0:36:58", "remaining_time": "4:06:21"} +{"current_steps": 2090, "total_steps": 15940, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.588616370795947e-06, "epoch": 1.31, "percentage": 13.11, "elapsed_time": "0:37:05", "remaining_time": "4:05:50"} +{"current_steps": 2100, "total_steps": 15940, "loss": 0.7037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.584688140963945e-06, "epoch": 1.32, "percentage": 13.17, "elapsed_time": "0:37:13", "remaining_time": "4:05:22"} +{"current_steps": 2110, "total_steps": 15940, "loss": 0.7199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.580742057635672e-06, "epoch": 1.32, "percentage": 13.24, "elapsed_time": "0:37:22", "remaining_time": "4:04:56"} +{"current_steps": 2120, "total_steps": 15940, "loss": 0.7098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.576778136177798e-06, "epoch": 1.33, "percentage": 13.3, "elapsed_time": "0:37:30", "remaining_time": "4:04:31"} +{"current_steps": 2130, "total_steps": 15940, "loss": 0.7109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.572796392026455e-06, "epoch": 1.34, "percentage": 13.36, "elapsed_time": "0:37:39", "remaining_time": "4:04:06"} +{"current_steps": 2140, "total_steps": 15940, "loss": 0.693, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.568796840687184e-06, "epoch": 1.34, "percentage": 13.43, "elapsed_time": "0:37:45", "remaining_time": "4:03:30"} +{"current_steps": 2150, "total_steps": 15940, "loss": 0.6679, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.564779497734864e-06, "epoch": 1.35, "percentage": 13.49, "elapsed_time": "0:37:53", "remaining_time": "4:03:00"} +{"current_steps": 2160, "total_steps": 15940, "loss": 0.6573, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.56074437881366e-06, "epoch": 1.36, "percentage": 13.55, "elapsed_time": "0:37:59", "remaining_time": "4:02:24"} +{"current_steps": 2170, "total_steps": 15940, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.55669149963696e-06, "epoch": 1.36, "percentage": 13.61, "elapsed_time": "0:38:07", "remaining_time": "4:01:57"} +{"current_steps": 2180, "total_steps": 15940, "loss": 0.6932, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.552620875987312e-06, "epoch": 1.37, "percentage": 13.68, "elapsed_time": "0:38:14", "remaining_time": "4:01:22"} +{"current_steps": 2190, "total_steps": 15940, "loss": 0.6616, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.548532523716366e-06, "epoch": 1.37, "percentage": 13.74, "elapsed_time": "0:38:20", "remaining_time": "4:00:44"} +{"current_steps": 2200, "total_steps": 15940, "loss": 0.7182, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.544426458744805e-06, "epoch": 1.38, "percentage": 13.8, "elapsed_time": "0:38:26", "remaining_time": "4:00:07"} +{"current_steps": 2200, "total_steps": 15940, "loss": null, "eval_loss": 0.7022137641906738, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.38, "percentage": 13.8, "elapsed_time": "0:38:26", "remaining_time": "4:00:07"} +{"current_steps": 2210, "total_steps": 15940, "loss": 0.6878, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.540302697062294e-06, "epoch": 1.39, "percentage": 13.86, "elapsed_time": "0:39:39", "remaining_time": "4:06:22"} +{"current_steps": 2220, "total_steps": 15940, "loss": 0.6979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.536161254727407e-06, "epoch": 1.39, "percentage": 13.93, "elapsed_time": "0:39:47", "remaining_time": "4:05:55"} +{"current_steps": 2230, "total_steps": 15940, "loss": 0.6749, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.532002147867575e-06, "epoch": 1.4, "percentage": 13.99, "elapsed_time": "0:39:55", "remaining_time": "4:05:27"} +{"current_steps": 2240, "total_steps": 15940, "loss": 0.6987, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.527825392679012e-06, "epoch": 1.41, "percentage": 14.05, "elapsed_time": "0:40:06", "remaining_time": "4:05:19"} +{"current_steps": 2250, "total_steps": 15940, "loss": 0.6888, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.523631005426658e-06, "epoch": 1.41, "percentage": 14.12, "elapsed_time": "0:40:13", "remaining_time": "4:04:46"} +{"current_steps": 2260, "total_steps": 15940, "loss": 0.6471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.51941900244412e-06, "epoch": 1.42, "percentage": 14.18, "elapsed_time": "0:40:19", "remaining_time": "4:04:07"} +{"current_steps": 2270, "total_steps": 15940, "loss": 0.6689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.515189400133594e-06, "epoch": 1.42, "percentage": 14.24, "elapsed_time": "0:40:27", "remaining_time": "4:03:38"} +{"current_steps": 2280, "total_steps": 15940, "loss": 0.7001, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.510942214965819e-06, "epoch": 1.43, "percentage": 14.3, "elapsed_time": "0:40:35", "remaining_time": "4:03:13"} +{"current_steps": 2290, "total_steps": 15940, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.506677463480003e-06, "epoch": 1.44, "percentage": 14.37, "elapsed_time": "0:40:43", "remaining_time": "4:02:47"} +{"current_steps": 2300, "total_steps": 15940, "loss": 0.7008, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.50239516228376e-06, "epoch": 1.44, "percentage": 14.43, "elapsed_time": "0:40:50", "remaining_time": "4:02:11"} +{"current_steps": 2310, "total_steps": 15940, "loss": 0.7122, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.49809532805304e-06, "epoch": 1.45, "percentage": 14.49, "elapsed_time": "0:40:59", "remaining_time": "4:01:49"} +{"current_steps": 2320, "total_steps": 15940, "loss": 0.7106, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.493777977532072e-06, "epoch": 1.46, "percentage": 14.55, "elapsed_time": "0:41:06", "remaining_time": "4:01:20"} +{"current_steps": 2330, "total_steps": 15940, "loss": 0.6739, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.489443127533304e-06, "epoch": 1.46, "percentage": 14.62, "elapsed_time": "0:41:12", "remaining_time": "4:00:44"} +{"current_steps": 2340, "total_steps": 15940, "loss": 0.6888, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.485090794937319e-06, "epoch": 1.47, "percentage": 14.68, "elapsed_time": "0:41:22", "remaining_time": "4:00:25"} +{"current_steps": 2350, "total_steps": 15940, "loss": 0.6986, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.480720996692783e-06, "epoch": 1.47, "percentage": 14.74, "elapsed_time": "0:41:28", "remaining_time": "3:59:50"} +{"current_steps": 2360, "total_steps": 15940, "loss": 0.7314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.476333749816382e-06, "epoch": 1.48, "percentage": 14.81, "elapsed_time": "0:41:36", "remaining_time": "3:59:24"} +{"current_steps": 2370, "total_steps": 15940, "loss": 0.6602, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.47192907139274e-06, "epoch": 1.49, "percentage": 14.87, "elapsed_time": "0:41:42", "remaining_time": "3:58:49"} +{"current_steps": 2380, "total_steps": 15940, "loss": 0.6454, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.46750697857437e-06, "epoch": 1.49, "percentage": 14.93, "elapsed_time": "0:41:51", "remaining_time": "3:58:27"} +{"current_steps": 2390, "total_steps": 15940, "loss": 0.6499, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.463067488581598e-06, "epoch": 1.5, "percentage": 14.99, "elapsed_time": "0:41:58", "remaining_time": "3:57:59"} +{"current_steps": 2400, "total_steps": 15940, "loss": 0.7267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.45861061870249e-06, "epoch": 1.51, "percentage": 15.06, "elapsed_time": "0:42:06", "remaining_time": "3:57:35"} +{"current_steps": 2400, "total_steps": 15940, "loss": null, "eval_loss": 0.6993948817253113, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.51, "percentage": 15.06, "elapsed_time": "0:42:06", "remaining_time": "3:57:35"} +{"current_steps": 2410, "total_steps": 15940, "loss": 0.6934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.454136386292804e-06, "epoch": 1.51, "percentage": 15.12, "elapsed_time": "0:43:18", "remaining_time": "4:03:10"} +{"current_steps": 2420, "total_steps": 15940, "loss": 0.7095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.449644808775902e-06, "epoch": 1.52, "percentage": 15.18, "elapsed_time": "0:43:26", "remaining_time": "4:02:40"} +{"current_steps": 2430, "total_steps": 15940, "loss": 0.6626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.445135903642693e-06, "epoch": 1.52, "percentage": 15.24, "elapsed_time": "0:43:32", "remaining_time": "4:02:05"} +{"current_steps": 2440, "total_steps": 15940, "loss": 0.6513, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.440609688451561e-06, "epoch": 1.53, "percentage": 15.31, "elapsed_time": "0:43:40", "remaining_time": "4:01:39"} +{"current_steps": 2450, "total_steps": 15940, "loss": 0.711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.4360661808283e-06, "epoch": 1.54, "percentage": 15.37, "elapsed_time": "0:43:47", "remaining_time": "4:01:06"} +{"current_steps": 2460, "total_steps": 15940, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.431505398466045e-06, "epoch": 1.54, "percentage": 15.43, "elapsed_time": "0:43:53", "remaining_time": "4:00:32"} +{"current_steps": 2470, "total_steps": 15940, "loss": 0.7073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.426927359125195e-06, "epoch": 1.55, "percentage": 15.5, "elapsed_time": "0:44:01", "remaining_time": "4:00:07"} +{"current_steps": 2480, "total_steps": 15940, "loss": 0.6557, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.422332080633361e-06, "epoch": 1.56, "percentage": 15.56, "elapsed_time": "0:44:08", "remaining_time": "3:59:33"} +{"current_steps": 2490, "total_steps": 15940, "loss": 0.6786, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.417719580885275e-06, "epoch": 1.56, "percentage": 15.62, "elapsed_time": "0:44:15", "remaining_time": "3:59:06"} +{"current_steps": 2500, "total_steps": 15940, "loss": 0.6159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.413089877842735e-06, "epoch": 1.57, "percentage": 15.68, "elapsed_time": "0:44:23", "remaining_time": "3:58:40"} +{"current_steps": 2510, "total_steps": 15940, "loss": 0.7341, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.408442989534536e-06, "epoch": 1.57, "percentage": 15.75, "elapsed_time": "0:44:30", "remaining_time": "3:58:10"} +{"current_steps": 2520, "total_steps": 15940, "loss": 0.6737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.403778934056392e-06, "epoch": 1.58, "percentage": 15.81, "elapsed_time": "0:44:40", "remaining_time": "3:57:56"} +{"current_steps": 2530, "total_steps": 15940, "loss": 0.6832, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.399097729570865e-06, "epoch": 1.59, "percentage": 15.87, "elapsed_time": "0:44:46", "remaining_time": "3:57:21"} +{"current_steps": 2540, "total_steps": 15940, "loss": 0.6691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.394399394307303e-06, "epoch": 1.59, "percentage": 15.93, "elapsed_time": "0:44:54", "remaining_time": "3:56:56"} +{"current_steps": 2550, "total_steps": 15940, "loss": 0.7072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.38968394656176e-06, "epoch": 1.6, "percentage": 16.0, "elapsed_time": "0:45:04", "remaining_time": "3:56:38"} +{"current_steps": 2560, "total_steps": 15940, "loss": 0.7068, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.384951404696933e-06, "epoch": 1.61, "percentage": 16.06, "elapsed_time": "0:45:11", "remaining_time": "3:56:13"} +{"current_steps": 2570, "total_steps": 15940, "loss": 0.6476, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.380201787142085e-06, "epoch": 1.61, "percentage": 16.12, "elapsed_time": "0:45:17", "remaining_time": "3:55:39"} +{"current_steps": 2580, "total_steps": 15940, "loss": 0.6805, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.37543511239297e-06, "epoch": 1.62, "percentage": 16.19, "elapsed_time": "0:45:24", "remaining_time": "3:55:06"} +{"current_steps": 2590, "total_steps": 15940, "loss": 0.6887, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.370651399011769e-06, "epoch": 1.62, "percentage": 16.25, "elapsed_time": "0:45:30", "remaining_time": "3:54:34"} +{"current_steps": 2600, "total_steps": 15940, "loss": 0.6912, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.365850665627016e-06, "epoch": 1.63, "percentage": 16.31, "elapsed_time": "0:45:37", "remaining_time": "3:54:06"} +{"current_steps": 2600, "total_steps": 15940, "loss": null, "eval_loss": 0.6971801519393921, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.63, "percentage": 16.31, "elapsed_time": "0:45:37", "remaining_time": "3:54:06"} +{"current_steps": 2610, "total_steps": 15940, "loss": 0.6479, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.36103293093352e-06, "epoch": 1.64, "percentage": 16.37, "elapsed_time": "0:46:51", "remaining_time": "3:59:17"} +{"current_steps": 2620, "total_steps": 15940, "loss": 0.6788, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.356198213692297e-06, "epoch": 1.64, "percentage": 16.44, "elapsed_time": "0:46:58", "remaining_time": "3:58:48"} +{"current_steps": 2630, "total_steps": 15940, "loss": 0.6481, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.351346532730499e-06, "epoch": 1.65, "percentage": 16.5, "elapsed_time": "0:47:05", "remaining_time": "3:58:17"} +{"current_steps": 2640, "total_steps": 15940, "loss": 0.6893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.346477906941331e-06, "epoch": 1.66, "percentage": 16.56, "elapsed_time": "0:47:11", "remaining_time": "3:57:45"} +{"current_steps": 2650, "total_steps": 15940, "loss": 0.6784, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.341592355283986e-06, "epoch": 1.66, "percentage": 16.62, "elapsed_time": "0:47:17", "remaining_time": "3:57:12"} +{"current_steps": 2660, "total_steps": 15940, "loss": 0.6834, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.336689896783575e-06, "epoch": 1.67, "percentage": 16.69, "elapsed_time": "0:47:24", "remaining_time": "3:56:42"} +{"current_steps": 2670, "total_steps": 15940, "loss": 0.6701, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.331770550531037e-06, "epoch": 1.68, "percentage": 16.75, "elapsed_time": "0:47:32", "remaining_time": "3:56:19"} +{"current_steps": 2680, "total_steps": 15940, "loss": 0.6691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.32683433568308e-06, "epoch": 1.68, "percentage": 16.81, "elapsed_time": "0:47:40", "remaining_time": "3:55:51"} +{"current_steps": 2690, "total_steps": 15940, "loss": 0.6818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.321881271462104e-06, "epoch": 1.69, "percentage": 16.88, "elapsed_time": "0:47:47", "remaining_time": "3:55:26"} +{"current_steps": 2700, "total_steps": 15940, "loss": 0.6852, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.316911377156116e-06, "epoch": 1.69, "percentage": 16.94, "elapsed_time": "0:47:55", "remaining_time": "3:55:01"} +{"current_steps": 2710, "total_steps": 15940, "loss": 0.6653, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.31192467211867e-06, "epoch": 1.7, "percentage": 17.0, "elapsed_time": "0:48:03", "remaining_time": "3:54:38"} +{"current_steps": 2720, "total_steps": 15940, "loss": 0.6671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.306921175768776e-06, "epoch": 1.71, "percentage": 17.06, "elapsed_time": "0:48:10", "remaining_time": "3:54:08"} +{"current_steps": 2730, "total_steps": 15940, "loss": 0.7066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.301900907590836e-06, "epoch": 1.71, "percentage": 17.13, "elapsed_time": "0:48:16", "remaining_time": "3:53:36"} +{"current_steps": 2740, "total_steps": 15940, "loss": 0.7326, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.296863887134561e-06, "epoch": 1.72, "percentage": 17.19, "elapsed_time": "0:48:23", "remaining_time": "3:53:05"} +{"current_steps": 2750, "total_steps": 15940, "loss": 0.6758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.291810134014904e-06, "epoch": 1.73, "percentage": 17.25, "elapsed_time": "0:48:33", "remaining_time": "3:52:56"} +{"current_steps": 2760, "total_steps": 15940, "loss": 0.6645, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.286739667911973e-06, "epoch": 1.73, "percentage": 17.31, "elapsed_time": "0:48:42", "remaining_time": "3:52:36"} +{"current_steps": 2770, "total_steps": 15940, "loss": 0.6968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.281652508570957e-06, "epoch": 1.74, "percentage": 17.38, "elapsed_time": "0:48:48", "remaining_time": "3:52:05"} +{"current_steps": 2780, "total_steps": 15940, "loss": 0.6718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.27654867580206e-06, "epoch": 1.74, "percentage": 17.44, "elapsed_time": "0:48:55", "remaining_time": "3:51:36"} +{"current_steps": 2790, "total_steps": 15940, "loss": 0.6915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.271428189480405e-06, "epoch": 1.75, "percentage": 17.5, "elapsed_time": "0:49:02", "remaining_time": "3:51:10"} +{"current_steps": 2800, "total_steps": 15940, "loss": 0.6821, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.266291069545972e-06, "epoch": 1.76, "percentage": 17.57, "elapsed_time": "0:49:09", "remaining_time": "3:50:43"} +{"current_steps": 2800, "total_steps": 15940, "loss": null, "eval_loss": 0.6953641176223755, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.76, "percentage": 17.57, "elapsed_time": "0:49:09", "remaining_time": "3:50:43"} +{"current_steps": 2810, "total_steps": 15940, "loss": 0.666, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.261137336003511e-06, "epoch": 1.76, "percentage": 17.63, "elapsed_time": "0:50:21", "remaining_time": "3:55:18"} +{"current_steps": 2820, "total_steps": 15940, "loss": 0.6414, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.255967008922475e-06, "epoch": 1.77, "percentage": 17.69, "elapsed_time": "0:50:29", "remaining_time": "3:54:56"} +{"current_steps": 2830, "total_steps": 15940, "loss": 0.7321, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.250780108436926e-06, "epoch": 1.78, "percentage": 17.75, "elapsed_time": "0:50:36", "remaining_time": "3:54:27"} +{"current_steps": 2840, "total_steps": 15940, "loss": 0.735, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.245576654745471e-06, "epoch": 1.78, "percentage": 17.82, "elapsed_time": "0:50:45", "remaining_time": "3:54:06"} +{"current_steps": 2850, "total_steps": 15940, "loss": 0.6809, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.24035666811118e-06, "epoch": 1.79, "percentage": 17.88, "elapsed_time": "0:50:52", "remaining_time": "3:53:38"} +{"current_steps": 2860, "total_steps": 15940, "loss": 0.6378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.235120168861495e-06, "epoch": 1.79, "percentage": 17.94, "elapsed_time": "0:50:58", "remaining_time": "3:53:08"} +{"current_steps": 2870, "total_steps": 15940, "loss": 0.6648, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.229867177388172e-06, "epoch": 1.8, "percentage": 18.01, "elapsed_time": "0:51:04", "remaining_time": "3:52:38"} +{"current_steps": 2880, "total_steps": 15940, "loss": 0.6681, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.224597714147186e-06, "epoch": 1.81, "percentage": 18.07, "elapsed_time": "0:51:12", "remaining_time": "3:52:12"} +{"current_steps": 2890, "total_steps": 15940, "loss": 0.6752, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.219311799658652e-06, "epoch": 1.81, "percentage": 18.13, "elapsed_time": "0:51:20", "remaining_time": "3:51:49"} +{"current_steps": 2900, "total_steps": 15940, "loss": 0.6427, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.214009454506754e-06, "epoch": 1.82, "percentage": 18.19, "elapsed_time": "0:51:27", "remaining_time": "3:51:23"} +{"current_steps": 2910, "total_steps": 15940, "loss": 0.6763, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.208690699339656e-06, "epoch": 1.83, "percentage": 18.26, "elapsed_time": "0:51:34", "remaining_time": "3:50:57"} +{"current_steps": 2920, "total_steps": 15940, "loss": 0.6935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.203355554869428e-06, "epoch": 1.83, "percentage": 18.32, "elapsed_time": "0:51:43", "remaining_time": "3:50:38"} +{"current_steps": 2930, "total_steps": 15940, "loss": 0.7012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.198004041871962e-06, "epoch": 1.84, "percentage": 18.38, "elapsed_time": "0:51:51", "remaining_time": "3:50:13"} +{"current_steps": 2940, "total_steps": 15940, "loss": 0.6713, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.192636181186887e-06, "epoch": 1.84, "percentage": 18.44, "elapsed_time": "0:51:59", "remaining_time": "3:49:52"} +{"current_steps": 2950, "total_steps": 15940, "loss": 0.6344, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.1872519937175e-06, "epoch": 1.85, "percentage": 18.51, "elapsed_time": "0:52:06", "remaining_time": "3:49:28"} +{"current_steps": 2960, "total_steps": 15940, "loss": 0.6699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.181851500430672e-06, "epoch": 1.86, "percentage": 18.57, "elapsed_time": "0:52:15", "remaining_time": "3:49:07"} +{"current_steps": 2970, "total_steps": 15940, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.176434722356772e-06, "epoch": 1.86, "percentage": 18.63, "elapsed_time": "0:52:22", "remaining_time": "3:48:42"} +{"current_steps": 2980, "total_steps": 15940, "loss": 0.6491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.17100168058959e-06, "epoch": 1.87, "percentage": 18.7, "elapsed_time": "0:52:30", "remaining_time": "3:48:23"} +{"current_steps": 2990, "total_steps": 15940, "loss": 0.6722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.165552396286236e-06, "epoch": 1.88, "percentage": 18.76, "elapsed_time": "0:52:38", "remaining_time": "3:47:59"} +{"current_steps": 3000, "total_steps": 15940, "loss": 0.7104, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.160086890667086e-06, "epoch": 1.88, "percentage": 18.82, "elapsed_time": "0:52:46", "remaining_time": "3:47:36"} +{"current_steps": 3000, "total_steps": 15940, "loss": null, "eval_loss": 0.6943792104721069, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.88, "percentage": 18.82, "elapsed_time": "0:52:46", "remaining_time": "3:47:36"} +{"current_steps": 3010, "total_steps": 15940, "loss": 0.7042, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.154605185015678e-06, "epoch": 1.89, "percentage": 18.88, "elapsed_time": "0:53:58", "remaining_time": "3:51:51"} +{"current_steps": 3020, "total_steps": 15940, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.14910730067863e-06, "epoch": 1.89, "percentage": 18.95, "elapsed_time": "0:54:04", "remaining_time": "3:51:22"} +{"current_steps": 3030, "total_steps": 15940, "loss": 0.6721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.143593259065573e-06, "epoch": 1.9, "percentage": 19.01, "elapsed_time": "0:54:12", "remaining_time": "3:50:58"} +{"current_steps": 3040, "total_steps": 15940, "loss": 0.6328, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.138063081649052e-06, "epoch": 1.91, "percentage": 19.07, "elapsed_time": "0:54:19", "remaining_time": "3:50:31"} +{"current_steps": 3050, "total_steps": 15940, "loss": 0.6564, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.132516789964443e-06, "epoch": 1.91, "percentage": 19.13, "elapsed_time": "0:54:28", "remaining_time": "3:50:11"} +{"current_steps": 3060, "total_steps": 15940, "loss": 0.6782, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.126954405609882e-06, "epoch": 1.92, "percentage": 19.2, "elapsed_time": "0:54:35", "remaining_time": "3:49:46"} +{"current_steps": 3070, "total_steps": 15940, "loss": 0.6686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.121375950246165e-06, "epoch": 1.93, "percentage": 19.26, "elapsed_time": "0:54:43", "remaining_time": "3:49:24"} +{"current_steps": 3080, "total_steps": 15940, "loss": 0.6445, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.115781445596676e-06, "epoch": 1.93, "percentage": 19.32, "elapsed_time": "0:54:50", "remaining_time": "3:48:58"} +{"current_steps": 3090, "total_steps": 15940, "loss": 0.6306, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.110170913447294e-06, "epoch": 1.94, "percentage": 19.39, "elapsed_time": "0:55:00", "remaining_time": "3:48:45"} +{"current_steps": 3100, "total_steps": 15940, "loss": 0.6465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.104544375646314e-06, "epoch": 1.94, "percentage": 19.45, "elapsed_time": "0:55:07", "remaining_time": "3:48:18"} +{"current_steps": 3110, "total_steps": 15940, "loss": 0.6985, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.098901854104359e-06, "epoch": 1.95, "percentage": 19.51, "elapsed_time": "0:55:15", "remaining_time": "3:47:57"} +{"current_steps": 3120, "total_steps": 15940, "loss": 0.7272, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.09324337079429e-06, "epoch": 1.96, "percentage": 19.57, "elapsed_time": "0:55:21", "remaining_time": "3:47:27"} +{"current_steps": 3130, "total_steps": 15940, "loss": 0.6632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.08756894775114e-06, "epoch": 1.96, "percentage": 19.64, "elapsed_time": "0:55:29", "remaining_time": "3:47:07"} +{"current_steps": 3140, "total_steps": 15940, "loss": 0.6996, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.081878607071996e-06, "epoch": 1.97, "percentage": 19.7, "elapsed_time": "0:55:36", "remaining_time": "3:46:41"} +{"current_steps": 3150, "total_steps": 15940, "loss": 0.734, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.076172370915944e-06, "epoch": 1.98, "percentage": 19.76, "elapsed_time": "0:55:43", "remaining_time": "3:46:13"} +{"current_steps": 3160, "total_steps": 15940, "loss": 0.6578, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.07045026150396e-06, "epoch": 1.98, "percentage": 19.82, "elapsed_time": "0:55:51", "remaining_time": "3:45:52"} +{"current_steps": 3170, "total_steps": 15940, "loss": 0.6527, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.064712301118842e-06, "epoch": 1.99, "percentage": 19.89, "elapsed_time": "0:56:02", "remaining_time": "3:45:45"} +{"current_steps": 3180, "total_steps": 15940, "loss": 0.6487, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.058958512105104e-06, "epoch": 1.99, "percentage": 19.95, "elapsed_time": "0:56:11", "remaining_time": "3:45:26"} +{"current_steps": 3190, "total_steps": 15940, "loss": 0.7011, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.053188916868912e-06, "epoch": 2.0, "percentage": 20.01, "elapsed_time": "0:56:18", "remaining_time": "3:45:05"} +{"current_steps": 3200, "total_steps": 15940, "loss": 0.6222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.04740353787797e-06, "epoch": 2.01, "percentage": 20.08, "elapsed_time": "0:56:27", "remaining_time": "3:44:44"} +{"current_steps": 3200, "total_steps": 15940, "loss": null, "eval_loss": 0.693417489528656, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.01, "percentage": 20.08, "elapsed_time": "0:56:27", "remaining_time": "3:44:44"} +{"current_steps": 3210, "total_steps": 15940, "loss": 0.6396, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.041602397661459e-06, "epoch": 2.01, "percentage": 20.14, "elapsed_time": "0:57:40", "remaining_time": "3:48:43"} +{"current_steps": 3220, "total_steps": 15940, "loss": 0.6582, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.035785518809928e-06, "epoch": 2.02, "percentage": 20.2, "elapsed_time": "0:57:47", "remaining_time": "3:48:16"} +{"current_steps": 3230, "total_steps": 15940, "loss": 0.6517, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.029952923975217e-06, "epoch": 2.03, "percentage": 20.26, "elapsed_time": "0:57:53", "remaining_time": "3:47:48"} +{"current_steps": 3240, "total_steps": 15940, "loss": 0.6465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.024104635870368e-06, "epoch": 2.03, "percentage": 20.33, "elapsed_time": "0:58:00", "remaining_time": "3:47:23"} +{"current_steps": 3250, "total_steps": 15940, "loss": 0.6215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.018240677269532e-06, "epoch": 2.04, "percentage": 20.39, "elapsed_time": "0:58:06", "remaining_time": "3:46:55"} +{"current_steps": 3260, "total_steps": 15940, "loss": 0.6609, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.012361071007892e-06, "epoch": 2.05, "percentage": 20.45, "elapsed_time": "0:58:13", "remaining_time": "3:46:28"} +{"current_steps": 3270, "total_steps": 15940, "loss": 0.6608, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.00646583998155e-06, "epoch": 2.05, "percentage": 20.51, "elapsed_time": "0:58:21", "remaining_time": "3:46:07"} +{"current_steps": 3280, "total_steps": 15940, "loss": 0.6222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.000555007147469e-06, "epoch": 2.06, "percentage": 20.58, "elapsed_time": "0:58:28", "remaining_time": "3:45:43"} +{"current_steps": 3290, "total_steps": 15940, "loss": 0.6363, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.994628595523358e-06, "epoch": 2.06, "percentage": 20.64, "elapsed_time": "0:58:36", "remaining_time": "3:45:19"} +{"current_steps": 3300, "total_steps": 15940, "loss": 0.6364, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.988686628187597e-06, "epoch": 2.07, "percentage": 20.7, "elapsed_time": "0:58:45", "remaining_time": "3:45:02"} +{"current_steps": 3310, "total_steps": 15940, "loss": 0.6211, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.98272912827914e-06, "epoch": 2.08, "percentage": 20.77, "elapsed_time": "0:58:51", "remaining_time": "3:44:36"} +{"current_steps": 3320, "total_steps": 15940, "loss": 0.6326, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.97675611899743e-06, "epoch": 2.08, "percentage": 20.83, "elapsed_time": "0:58:58", "remaining_time": "3:44:09"} +{"current_steps": 3330, "total_steps": 15940, "loss": 0.7006, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.970767623602299e-06, "epoch": 2.09, "percentage": 20.89, "elapsed_time": "0:59:06", "remaining_time": "3:43:48"} +{"current_steps": 3340, "total_steps": 15940, "loss": 0.6316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.964763665413894e-06, "epoch": 2.1, "percentage": 20.95, "elapsed_time": "0:59:12", "remaining_time": "3:43:21"} +{"current_steps": 3350, "total_steps": 15940, "loss": 0.6318, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.95874426781257e-06, "epoch": 2.1, "percentage": 21.02, "elapsed_time": "0:59:19", "remaining_time": "3:42:57"} +{"current_steps": 3360, "total_steps": 15940, "loss": 0.6067, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.952709454238809e-06, "epoch": 2.11, "percentage": 21.08, "elapsed_time": "0:59:27", "remaining_time": "3:42:38"} +{"current_steps": 3370, "total_steps": 15940, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.946659248193122e-06, "epoch": 2.11, "percentage": 21.14, "elapsed_time": "0:59:35", "remaining_time": "3:42:18"} +{"current_steps": 3380, "total_steps": 15940, "loss": 0.6537, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.940593673235962e-06, "epoch": 2.12, "percentage": 21.2, "elapsed_time": "0:59:42", "remaining_time": "3:41:51"} +{"current_steps": 3390, "total_steps": 15940, "loss": 0.6986, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.934512752987635e-06, "epoch": 2.13, "percentage": 21.27, "elapsed_time": "0:59:50", "remaining_time": "3:41:31"} +{"current_steps": 3400, "total_steps": 15940, "loss": 0.6383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.928416511128194e-06, "epoch": 2.13, "percentage": 21.33, "elapsed_time": "0:59:58", "remaining_time": "3:41:12"} +{"current_steps": 3400, "total_steps": 15940, "loss": null, "eval_loss": 0.6974382996559143, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.13, "percentage": 21.33, "elapsed_time": "0:59:58", "remaining_time": "3:41:12"} +{"current_steps": 3410, "total_steps": 15940, "loss": 0.6447, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.922304971397369e-06, "epoch": 2.14, "percentage": 21.39, "elapsed_time": "1:01:11", "remaining_time": "3:44:51"} +{"current_steps": 3420, "total_steps": 15940, "loss": 0.6441, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.916178157594453e-06, "epoch": 2.15, "percentage": 21.46, "elapsed_time": "1:01:18", "remaining_time": "3:44:26"} +{"current_steps": 3430, "total_steps": 15940, "loss": 0.6453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.910036093578223e-06, "epoch": 2.15, "percentage": 21.52, "elapsed_time": "1:01:26", "remaining_time": "3:44:04"} +{"current_steps": 3440, "total_steps": 15940, "loss": 0.6256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.90387880326684e-06, "epoch": 2.16, "percentage": 21.58, "elapsed_time": "1:01:34", "remaining_time": "3:43:45"} +{"current_steps": 3450, "total_steps": 15940, "loss": 0.6244, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.897706310637766e-06, "epoch": 2.16, "percentage": 21.64, "elapsed_time": "1:01:41", "remaining_time": "3:43:18"} +{"current_steps": 3460, "total_steps": 15940, "loss": 0.5975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.89151863972765e-06, "epoch": 2.17, "percentage": 21.71, "elapsed_time": "1:01:47", "remaining_time": "3:42:52"} +{"current_steps": 3470, "total_steps": 15940, "loss": 0.6777, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.88531581463226e-06, "epoch": 2.18, "percentage": 21.77, "elapsed_time": "1:01:57", "remaining_time": "3:42:37"} +{"current_steps": 3480, "total_steps": 15940, "loss": 0.6139, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.879097859506371e-06, "epoch": 2.18, "percentage": 21.83, "elapsed_time": "1:02:03", "remaining_time": "3:42:12"} +{"current_steps": 3490, "total_steps": 15940, "loss": 0.5775, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.872864798563676e-06, "epoch": 2.19, "percentage": 21.89, "elapsed_time": "1:02:10", "remaining_time": "3:41:47"} +{"current_steps": 3500, "total_steps": 15940, "loss": 0.624, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.866616656076696e-06, "epoch": 2.2, "percentage": 21.96, "elapsed_time": "1:02:17", "remaining_time": "3:41:24"} +{"current_steps": 3510, "total_steps": 15940, "loss": 0.6647, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.860353456376679e-06, "epoch": 2.2, "percentage": 22.02, "elapsed_time": "1:02:23", "remaining_time": "3:40:58"} +{"current_steps": 3520, "total_steps": 15940, "loss": 0.7081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.854075223853509e-06, "epoch": 2.21, "percentage": 22.08, "elapsed_time": "1:02:31", "remaining_time": "3:40:35"} +{"current_steps": 3530, "total_steps": 15940, "loss": 0.6974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.847781982955613e-06, "epoch": 2.21, "percentage": 22.15, "elapsed_time": "1:02:38", "remaining_time": "3:40:14"} +{"current_steps": 3540, "total_steps": 15940, "loss": 0.6585, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.841473758189853e-06, "epoch": 2.22, "percentage": 22.21, "elapsed_time": "1:02:46", "remaining_time": "3:39:51"} +{"current_steps": 3550, "total_steps": 15940, "loss": 0.6116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.835150574121455e-06, "epoch": 2.23, "percentage": 22.27, "elapsed_time": "1:02:54", "remaining_time": "3:39:33"} +{"current_steps": 3560, "total_steps": 15940, "loss": 0.6333, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.828812455373891e-06, "epoch": 2.23, "percentage": 22.33, "elapsed_time": "1:03:02", "remaining_time": "3:39:13"} +{"current_steps": 3570, "total_steps": 15940, "loss": 0.6015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.82245942662879e-06, "epoch": 2.24, "percentage": 22.4, "elapsed_time": "1:03:09", "remaining_time": "3:38:51"} +{"current_steps": 3580, "total_steps": 15940, "loss": 0.6719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.816091512625845e-06, "epoch": 2.25, "percentage": 22.46, "elapsed_time": "1:03:18", "remaining_time": "3:38:32"} +{"current_steps": 3590, "total_steps": 15940, "loss": 0.6562, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.80970873816271e-06, "epoch": 2.25, "percentage": 22.52, "elapsed_time": "1:03:24", "remaining_time": "3:38:08"} +{"current_steps": 3600, "total_steps": 15940, "loss": 0.6436, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.803311128094918e-06, "epoch": 2.26, "percentage": 22.58, "elapsed_time": "1:03:31", "remaining_time": "3:37:46"} +{"current_steps": 3600, "total_steps": 15940, "loss": null, "eval_loss": 0.6980520486831665, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.26, "percentage": 22.58, "elapsed_time": "1:03:31", "remaining_time": "3:37:46"} +{"current_steps": 3610, "total_steps": 15940, "loss": 0.6022, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.796898707335766e-06, "epoch": 2.26, "percentage": 22.65, "elapsed_time": "1:04:43", "remaining_time": "3:41:04"} +{"current_steps": 3620, "total_steps": 15940, "loss": 0.6124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.790471500856229e-06, "epoch": 2.27, "percentage": 22.71, "elapsed_time": "1:04:49", "remaining_time": "3:40:38"} +{"current_steps": 3630, "total_steps": 15940, "loss": 0.6209, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.784029533684857e-06, "epoch": 2.28, "percentage": 22.77, "elapsed_time": "1:04:57", "remaining_time": "3:40:16"} +{"current_steps": 3640, "total_steps": 15940, "loss": 0.6179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.777572830907685e-06, "epoch": 2.28, "percentage": 22.84, "elapsed_time": "1:05:05", "remaining_time": "3:39:57"} +{"current_steps": 3650, "total_steps": 15940, "loss": 0.6815, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.771101417668127e-06, "epoch": 2.29, "percentage": 22.9, "elapsed_time": "1:05:14", "remaining_time": "3:39:39"} +{"current_steps": 3660, "total_steps": 15940, "loss": 0.5767, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.764615319166885e-06, "epoch": 2.3, "percentage": 22.96, "elapsed_time": "1:05:20", "remaining_time": "3:39:14"} +{"current_steps": 3670, "total_steps": 15940, "loss": 0.6281, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.758114560661846e-06, "epoch": 2.3, "percentage": 23.02, "elapsed_time": "1:05:27", "remaining_time": "3:38:49"} +{"current_steps": 3680, "total_steps": 15940, "loss": 0.6368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.751599167467985e-06, "epoch": 2.31, "percentage": 23.09, "elapsed_time": "1:05:34", "remaining_time": "3:38:28"} +{"current_steps": 3690, "total_steps": 15940, "loss": 0.6503, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.745069164957265e-06, "epoch": 2.31, "percentage": 23.15, "elapsed_time": "1:05:42", "remaining_time": "3:38:09"} +{"current_steps": 3700, "total_steps": 15940, "loss": 0.6503, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.738524578558547e-06, "epoch": 2.32, "percentage": 23.21, "elapsed_time": "1:05:50", "remaining_time": "3:37:50"} +{"current_steps": 3710, "total_steps": 15940, "loss": 0.6412, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.731965433757474e-06, "epoch": 2.33, "percentage": 23.27, "elapsed_time": "1:05:59", "remaining_time": "3:37:31"} +{"current_steps": 3720, "total_steps": 15940, "loss": 0.6283, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.72539175609639e-06, "epoch": 2.33, "percentage": 23.34, "elapsed_time": "1:06:06", "remaining_time": "3:37:10"} +{"current_steps": 3730, "total_steps": 15940, "loss": 0.6316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.718803571174229e-06, "epoch": 2.34, "percentage": 23.4, "elapsed_time": "1:06:13", "remaining_time": "3:36:46"} +{"current_steps": 3740, "total_steps": 15940, "loss": 0.6337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.712200904646417e-06, "epoch": 2.35, "percentage": 23.46, "elapsed_time": "1:06:23", "remaining_time": "3:36:32"} +{"current_steps": 3750, "total_steps": 15940, "loss": 0.6683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.705583782224776e-06, "epoch": 2.35, "percentage": 23.53, "elapsed_time": "1:06:29", "remaining_time": "3:36:09"} +{"current_steps": 3760, "total_steps": 15940, "loss": 0.6538, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.698952229677422e-06, "epoch": 2.36, "percentage": 23.59, "elapsed_time": "1:06:36", "remaining_time": "3:35:44"} +{"current_steps": 3770, "total_steps": 15940, "loss": 0.6179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.692306272828661e-06, "epoch": 2.37, "percentage": 23.65, "elapsed_time": "1:06:44", "remaining_time": "3:35:25"} +{"current_steps": 3780, "total_steps": 15940, "loss": 0.6436, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.685645937558896e-06, "epoch": 2.37, "percentage": 23.71, "elapsed_time": "1:06:50", "remaining_time": "3:35:01"} +{"current_steps": 3790, "total_steps": 15940, "loss": 0.6242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.678971249804517e-06, "epoch": 2.38, "percentage": 23.78, "elapsed_time": "1:06:58", "remaining_time": "3:34:41"} +{"current_steps": 3800, "total_steps": 15940, "loss": 0.6444, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.67228223555781e-06, "epoch": 2.38, "percentage": 23.84, "elapsed_time": "1:07:04", "remaining_time": "3:34:18"} +{"current_steps": 3800, "total_steps": 15940, "loss": null, "eval_loss": 0.6968220472335815, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.38, "percentage": 23.84, "elapsed_time": "1:07:04", "remaining_time": "3:34:18"} +{"current_steps": 3810, "total_steps": 15940, "loss": 0.6562, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.665578920866844e-06, "epoch": 2.39, "percentage": 23.9, "elapsed_time": "1:08:17", "remaining_time": "3:37:25"} +{"current_steps": 3820, "total_steps": 15940, "loss": 0.6776, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.658861331835384e-06, "epoch": 2.4, "percentage": 23.96, "elapsed_time": "1:08:24", "remaining_time": "3:37:02"} +{"current_steps": 3830, "total_steps": 15940, "loss": 0.6245, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.652129494622776e-06, "epoch": 2.4, "percentage": 24.03, "elapsed_time": "1:08:33", "remaining_time": "3:36:47"} +{"current_steps": 3840, "total_steps": 15940, "loss": 0.6692, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.645383435443853e-06, "epoch": 2.41, "percentage": 24.09, "elapsed_time": "1:08:40", "remaining_time": "3:36:24"} +{"current_steps": 3850, "total_steps": 15940, "loss": 0.6314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.638623180568829e-06, "epoch": 2.42, "percentage": 24.15, "elapsed_time": "1:08:47", "remaining_time": "3:36:00"} +{"current_steps": 3860, "total_steps": 15940, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.631848756323198e-06, "epoch": 2.42, "percentage": 24.22, "elapsed_time": "1:08:53", "remaining_time": "3:35:36"} +{"current_steps": 3870, "total_steps": 15940, "loss": 0.6367, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.625060189087636e-06, "epoch": 2.43, "percentage": 24.28, "elapsed_time": "1:08:59", "remaining_time": "3:35:11"} +{"current_steps": 3880, "total_steps": 15940, "loss": 0.6262, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.618257505297887e-06, "epoch": 2.43, "percentage": 24.34, "elapsed_time": "1:09:07", "remaining_time": "3:34:51"} +{"current_steps": 3890, "total_steps": 15940, "loss": 0.6598, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.611440731444673e-06, "epoch": 2.44, "percentage": 24.4, "elapsed_time": "1:09:14", "remaining_time": "3:34:28"} +{"current_steps": 3900, "total_steps": 15940, "loss": 0.6465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.604609894073583e-06, "epoch": 2.45, "percentage": 24.47, "elapsed_time": "1:09:21", "remaining_time": "3:34:07"} +{"current_steps": 3910, "total_steps": 15940, "loss": 0.665, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.597765019784972e-06, "epoch": 2.45, "percentage": 24.53, "elapsed_time": "1:09:30", "remaining_time": "3:33:52"} +{"current_steps": 3920, "total_steps": 15940, "loss": 0.6207, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.590906135233854e-06, "epoch": 2.46, "percentage": 24.59, "elapsed_time": "1:09:38", "remaining_time": "3:33:33"} +{"current_steps": 3930, "total_steps": 15940, "loss": 0.6626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.584033267129807e-06, "epoch": 2.47, "percentage": 24.65, "elapsed_time": "1:09:46", "remaining_time": "3:33:14"} +{"current_steps": 3940, "total_steps": 15940, "loss": 0.6141, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.577146442236856e-06, "epoch": 2.47, "percentage": 24.72, "elapsed_time": "1:09:54", "remaining_time": "3:32:54"} +{"current_steps": 3950, "total_steps": 15940, "loss": 0.6651, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.570245687373384e-06, "epoch": 2.48, "percentage": 24.78, "elapsed_time": "1:10:01", "remaining_time": "3:32:33"} +{"current_steps": 3960, "total_steps": 15940, "loss": 0.6725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.563331029412013e-06, "epoch": 2.48, "percentage": 24.84, "elapsed_time": "1:10:11", "remaining_time": "3:32:19"} +{"current_steps": 3970, "total_steps": 15940, "loss": 0.6066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.556402495279506e-06, "epoch": 2.49, "percentage": 24.91, "elapsed_time": "1:10:18", "remaining_time": "3:31:59"} +{"current_steps": 3980, "total_steps": 15940, "loss": 0.6752, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.549460111956665e-06, "epoch": 2.5, "percentage": 24.97, "elapsed_time": "1:10:26", "remaining_time": "3:31:40"} +{"current_steps": 3990, "total_steps": 15940, "loss": 0.6554, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.542503906478224e-06, "epoch": 2.5, "percentage": 25.03, "elapsed_time": "1:10:32", "remaining_time": "3:31:17"} +{"current_steps": 4000, "total_steps": 15940, "loss": 0.6368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.535533905932739e-06, "epoch": 2.51, "percentage": 25.09, "elapsed_time": "1:10:41", "remaining_time": "3:30:59"} +{"current_steps": 4000, "total_steps": 15940, "loss": null, "eval_loss": 0.6986888647079468, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.51, "percentage": 25.09, "elapsed_time": "1:10:41", "remaining_time": "3:30:59"} +{"current_steps": 4000, "total_steps": 15940, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.51, "percentage": 25.09, "elapsed_time": "1:10:41", "remaining_time": "3:30:59"} +{"current_steps": 282, "total_steps": 282, "loss": null, "eval_loss": 0.6943792104721069, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.51, "percentage": 100.0, "elapsed_time": "1:12:59", "remaining_time": "0:00:00"} diff --git a/llama2_13b_peft/news_commentary_de/trainer_state.json b/llama2_13b_peft/news_commentary_de/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..84ae3ec860dd98076061bd5bffd033d1cfdcb1db --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/trainer_state.json @@ -0,0 +1,2990 @@ +{ + "best_metric": 0.6943792104721069, + "best_model_checkpoint": "ckpt/llama2_13b_fuze30_no_sys/news_commentary_de_no_sys/checkpoint-3000", + "epoch": 2.509410288582183, + "eval_steps": 200, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 0.5409977436065674, + "learning_rate": 5e-06, + "loss": 1.3994, + "step": 10 + }, + { + "epoch": 0.01, + "grad_norm": 0.850004255771637, + "learning_rate": 1e-05, + "loss": 1.4561, + "step": 20 + }, + { + "epoch": 0.02, + "grad_norm": 0.8501812219619751, + "learning_rate": 9.999990264607035e-06, + "loss": 1.3697, + "step": 30 + }, + { + "epoch": 0.03, + "grad_norm": 0.6338475346565247, + "learning_rate": 9.999961058466052e-06, + "loss": 1.3627, + "step": 40 + }, + { + "epoch": 0.03, + "grad_norm": 0.7430967688560486, + "learning_rate": 9.999912381690781e-06, + "loss": 1.1155, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 0.5487976670265198, + "learning_rate": 9.999844234470782e-06, + "loss": 0.9492, + "step": 60 + }, + { + "epoch": 0.04, + "grad_norm": 0.3653506934642792, + "learning_rate": 9.999756617071427e-06, + "loss": 0.9067, + "step": 70 + }, + { + "epoch": 0.05, + "grad_norm": 0.38920339941978455, + "learning_rate": 9.999649529833915e-06, + "loss": 0.8848, + "step": 80 + }, + { + "epoch": 0.06, + "grad_norm": 0.4155251979827881, + "learning_rate": 9.999522973175257e-06, + "loss": 0.798, + "step": 90 + }, + { + "epoch": 0.06, + "grad_norm": 0.4156494438648224, + "learning_rate": 9.999376947588288e-06, + "loss": 0.8782, + "step": 100 + }, + { + "epoch": 0.07, + "grad_norm": 0.4306489825248718, + "learning_rate": 9.99921145364165e-06, + "loss": 0.8124, + "step": 110 + }, + { + "epoch": 0.08, + "grad_norm": 0.39355072379112244, + "learning_rate": 9.999026491979809e-06, + "loss": 0.838, + "step": 120 + }, + { + "epoch": 0.08, + "grad_norm": 0.4246688783168793, + "learning_rate": 9.99882206332303e-06, + "loss": 0.8383, + "step": 130 + }, + { + "epoch": 0.09, + "grad_norm": 0.47585156559944153, + "learning_rate": 9.99859816846739e-06, + "loss": 0.8705, + "step": 140 + }, + { + "epoch": 0.09, + "grad_norm": 0.48569419980049133, + "learning_rate": 9.998354808284774e-06, + "loss": 0.7872, + "step": 150 + }, + { + "epoch": 0.1, + "grad_norm": 0.5107733011245728, + "learning_rate": 9.998091983722862e-06, + "loss": 0.789, + "step": 160 + }, + { + "epoch": 0.11, + "grad_norm": 0.5669977068901062, + "learning_rate": 9.997809695805136e-06, + "loss": 0.7749, + "step": 170 + }, + { + "epoch": 0.11, + "grad_norm": 0.49600809812545776, + "learning_rate": 9.99750794563087e-06, + "loss": 0.7935, + "step": 180 + }, + { + "epoch": 0.12, + "grad_norm": 0.45251163840293884, + "learning_rate": 9.997186734375124e-06, + "loss": 0.7817, + "step": 190 + }, + { + "epoch": 0.13, + "grad_norm": 0.46742165088653564, + "learning_rate": 9.996846063288746e-06, + "loss": 0.7429, + "step": 200 + }, + { + "epoch": 0.13, + "eval_loss": 0.7712445855140686, + "eval_runtime": 64.6782, + "eval_samples_per_second": 69.575, + "eval_steps_per_second": 4.36, + "step": 200 + }, + { + "epoch": 0.13, + "grad_norm": 0.5643576383590698, + "learning_rate": 9.996485933698364e-06, + "loss": 0.7636, + "step": 210 + }, + { + "epoch": 0.14, + "grad_norm": 0.4915783405303955, + "learning_rate": 9.996106347006378e-06, + "loss": 0.7856, + "step": 220 + }, + { + "epoch": 0.14, + "grad_norm": 0.3926757574081421, + "learning_rate": 9.99570730469096e-06, + "loss": 0.7529, + "step": 230 + }, + { + "epoch": 0.15, + "grad_norm": 0.3297576606273651, + "learning_rate": 9.995288808306041e-06, + "loss": 0.7671, + "step": 240 + }, + { + "epoch": 0.16, + "grad_norm": 0.45379459857940674, + "learning_rate": 9.994850859481312e-06, + "loss": 0.7231, + "step": 250 + }, + { + "epoch": 0.16, + "grad_norm": 0.5688673853874207, + "learning_rate": 9.994393459922219e-06, + "loss": 0.7694, + "step": 260 + }, + { + "epoch": 0.17, + "grad_norm": 0.6590914130210876, + "learning_rate": 9.993916611409941e-06, + "loss": 0.7661, + "step": 270 + }, + { + "epoch": 0.18, + "grad_norm": 0.4207383692264557, + "learning_rate": 9.993420315801406e-06, + "loss": 0.7952, + "step": 280 + }, + { + "epoch": 0.18, + "grad_norm": 0.47460174560546875, + "learning_rate": 9.992904575029265e-06, + "loss": 0.7966, + "step": 290 + }, + { + "epoch": 0.19, + "grad_norm": 0.6118924617767334, + "learning_rate": 9.992369391101895e-06, + "loss": 0.8167, + "step": 300 + }, + { + "epoch": 0.19, + "grad_norm": 0.44934767484664917, + "learning_rate": 9.991814766103386e-06, + "loss": 0.7368, + "step": 310 + }, + { + "epoch": 0.2, + "grad_norm": 0.5106733441352844, + "learning_rate": 9.991240702193532e-06, + "loss": 0.7796, + "step": 320 + }, + { + "epoch": 0.21, + "grad_norm": 0.4405980706214905, + "learning_rate": 9.99064720160783e-06, + "loss": 0.7727, + "step": 330 + }, + { + "epoch": 0.21, + "grad_norm": 0.6010485887527466, + "learning_rate": 9.990034266657468e-06, + "loss": 0.7604, + "step": 340 + }, + { + "epoch": 0.22, + "grad_norm": 0.6098916530609131, + "learning_rate": 9.989401899729307e-06, + "loss": 0.7399, + "step": 350 + }, + { + "epoch": 0.23, + "grad_norm": 0.5837363004684448, + "learning_rate": 9.988750103285883e-06, + "loss": 0.7715, + "step": 360 + }, + { + "epoch": 0.23, + "grad_norm": 0.49089643359184265, + "learning_rate": 9.988078879865396e-06, + "loss": 0.738, + "step": 370 + }, + { + "epoch": 0.24, + "grad_norm": 0.508166491985321, + "learning_rate": 9.987388232081694e-06, + "loss": 0.8025, + "step": 380 + }, + { + "epoch": 0.24, + "grad_norm": 0.6415013074874878, + "learning_rate": 9.98667816262427e-06, + "loss": 0.7561, + "step": 390 + }, + { + "epoch": 0.25, + "grad_norm": 0.5850837230682373, + "learning_rate": 9.985948674258243e-06, + "loss": 0.7549, + "step": 400 + }, + { + "epoch": 0.25, + "eval_loss": 0.743410587310791, + "eval_runtime": 64.8376, + "eval_samples_per_second": 69.404, + "eval_steps_per_second": 4.349, + "step": 400 + }, + { + "epoch": 0.26, + "grad_norm": 0.627358615398407, + "learning_rate": 9.985199769824359e-06, + "loss": 0.7694, + "step": 410 + }, + { + "epoch": 0.26, + "grad_norm": 0.7586867213249207, + "learning_rate": 9.984431452238968e-06, + "loss": 0.7353, + "step": 420 + }, + { + "epoch": 0.27, + "grad_norm": 0.5713008642196655, + "learning_rate": 9.983643724494017e-06, + "loss": 0.7299, + "step": 430 + }, + { + "epoch": 0.28, + "grad_norm": 0.5664968490600586, + "learning_rate": 9.982836589657043e-06, + "loss": 0.754, + "step": 440 + }, + { + "epoch": 0.28, + "grad_norm": 0.4575900435447693, + "learning_rate": 9.98201005087116e-06, + "loss": 0.7355, + "step": 450 + }, + { + "epoch": 0.29, + "grad_norm": 0.6498897075653076, + "learning_rate": 9.981164111355036e-06, + "loss": 0.7543, + "step": 460 + }, + { + "epoch": 0.29, + "grad_norm": 0.6509144306182861, + "learning_rate": 9.98029877440289e-06, + "loss": 0.7568, + "step": 470 + }, + { + "epoch": 0.3, + "grad_norm": 0.44653260707855225, + "learning_rate": 9.979414043384485e-06, + "loss": 0.7313, + "step": 480 + }, + { + "epoch": 0.31, + "grad_norm": 0.7275229096412659, + "learning_rate": 9.978509921745101e-06, + "loss": 0.7456, + "step": 490 + }, + { + "epoch": 0.31, + "grad_norm": 0.4918762147426605, + "learning_rate": 9.97758641300553e-06, + "loss": 0.7585, + "step": 500 + }, + { + "epoch": 0.32, + "grad_norm": 0.5181304216384888, + "learning_rate": 9.97664352076206e-06, + "loss": 0.7311, + "step": 510 + }, + { + "epoch": 0.33, + "grad_norm": 0.5354281663894653, + "learning_rate": 9.97568124868646e-06, + "loss": 0.7173, + "step": 520 + }, + { + "epoch": 0.33, + "grad_norm": 0.47694316506385803, + "learning_rate": 9.974699600525972e-06, + "loss": 0.7408, + "step": 530 + }, + { + "epoch": 0.34, + "grad_norm": 0.5888867974281311, + "learning_rate": 9.973698580103286e-06, + "loss": 0.757, + "step": 540 + }, + { + "epoch": 0.35, + "grad_norm": 0.7656754851341248, + "learning_rate": 9.972678191316533e-06, + "loss": 0.7717, + "step": 550 + }, + { + "epoch": 0.35, + "grad_norm": 0.5808092355728149, + "learning_rate": 9.971638438139266e-06, + "loss": 0.7314, + "step": 560 + }, + { + "epoch": 0.36, + "grad_norm": 0.5002965331077576, + "learning_rate": 9.97057932462045e-06, + "loss": 0.7112, + "step": 570 + }, + { + "epoch": 0.36, + "grad_norm": 0.6044530272483826, + "learning_rate": 9.96950085488444e-06, + "loss": 0.7802, + "step": 580 + }, + { + "epoch": 0.37, + "grad_norm": 0.48741769790649414, + "learning_rate": 9.968403033130963e-06, + "loss": 0.7472, + "step": 590 + }, + { + "epoch": 0.38, + "grad_norm": 0.4956966042518616, + "learning_rate": 9.967285863635112e-06, + "loss": 0.7552, + "step": 600 + }, + { + "epoch": 0.38, + "eval_loss": 0.733000636100769, + "eval_runtime": 65.6052, + "eval_samples_per_second": 68.592, + "eval_steps_per_second": 4.298, + "step": 600 + }, + { + "epoch": 0.38, + "grad_norm": 0.528469979763031, + "learning_rate": 9.966149350747321e-06, + "loss": 0.7274, + "step": 610 + }, + { + "epoch": 0.39, + "grad_norm": 0.5717535614967346, + "learning_rate": 9.964993498893349e-06, + "loss": 0.7734, + "step": 620 + }, + { + "epoch": 0.4, + "grad_norm": 0.5049377083778381, + "learning_rate": 9.963818312574265e-06, + "loss": 0.7117, + "step": 630 + }, + { + "epoch": 0.4, + "grad_norm": 0.7002434134483337, + "learning_rate": 9.962623796366428e-06, + "loss": 0.7256, + "step": 640 + }, + { + "epoch": 0.41, + "grad_norm": 0.6600221991539001, + "learning_rate": 9.961409954921472e-06, + "loss": 0.764, + "step": 650 + }, + { + "epoch": 0.41, + "grad_norm": 0.5288920402526855, + "learning_rate": 9.96017679296629e-06, + "loss": 0.7385, + "step": 660 + }, + { + "epoch": 0.42, + "grad_norm": 0.6407844424247742, + "learning_rate": 9.958924315303005e-06, + "loss": 0.7386, + "step": 670 + }, + { + "epoch": 0.43, + "grad_norm": 0.6425316333770752, + "learning_rate": 9.95765252680896e-06, + "loss": 0.7013, + "step": 680 + }, + { + "epoch": 0.43, + "grad_norm": 0.6219075918197632, + "learning_rate": 9.956361432436705e-06, + "loss": 0.7104, + "step": 690 + }, + { + "epoch": 0.44, + "grad_norm": 0.5872789621353149, + "learning_rate": 9.95505103721396e-06, + "loss": 0.6988, + "step": 700 + }, + { + "epoch": 0.45, + "grad_norm": 0.8937903642654419, + "learning_rate": 9.953721346243613e-06, + "loss": 0.7177, + "step": 710 + }, + { + "epoch": 0.45, + "grad_norm": 0.5471718311309814, + "learning_rate": 9.952372364703688e-06, + "loss": 0.6804, + "step": 720 + }, + { + "epoch": 0.46, + "grad_norm": 0.7264242172241211, + "learning_rate": 9.95100409784733e-06, + "loss": 0.7432, + "step": 730 + }, + { + "epoch": 0.46, + "grad_norm": 0.7826697826385498, + "learning_rate": 9.949616551002787e-06, + "loss": 0.7521, + "step": 740 + }, + { + "epoch": 0.47, + "grad_norm": 0.6297461986541748, + "learning_rate": 9.948209729573384e-06, + "loss": 0.7624, + "step": 750 + }, + { + "epoch": 0.48, + "grad_norm": 0.7424671053886414, + "learning_rate": 9.946783639037503e-06, + "loss": 0.7619, + "step": 760 + }, + { + "epoch": 0.48, + "grad_norm": 0.5803889632225037, + "learning_rate": 9.945338284948568e-06, + "loss": 0.7159, + "step": 770 + }, + { + "epoch": 0.49, + "grad_norm": 0.895115077495575, + "learning_rate": 9.943873672935014e-06, + "loss": 0.7621, + "step": 780 + }, + { + "epoch": 0.5, + "grad_norm": 0.5325012803077698, + "learning_rate": 9.94238980870027e-06, + "loss": 0.6923, + "step": 790 + }, + { + "epoch": 0.5, + "grad_norm": 0.7378474473953247, + "learning_rate": 9.940886698022733e-06, + "loss": 0.7265, + "step": 800 + }, + { + "epoch": 0.5, + "eval_loss": 0.7256230711936951, + "eval_runtime": 70.6462, + "eval_samples_per_second": 63.698, + "eval_steps_per_second": 3.992, + "step": 800 + }, + { + "epoch": 0.51, + "grad_norm": 0.6454309225082397, + "learning_rate": 9.93936434675576e-06, + "loss": 0.6976, + "step": 810 + }, + { + "epoch": 0.51, + "grad_norm": 0.7399590015411377, + "learning_rate": 9.93782276082762e-06, + "loss": 0.7028, + "step": 820 + }, + { + "epoch": 0.52, + "grad_norm": 0.6661127209663391, + "learning_rate": 9.936261946241492e-06, + "loss": 0.7253, + "step": 830 + }, + { + "epoch": 0.53, + "grad_norm": 0.5480040907859802, + "learning_rate": 9.934681909075434e-06, + "loss": 0.7096, + "step": 840 + }, + { + "epoch": 0.53, + "grad_norm": 0.6889688968658447, + "learning_rate": 9.93308265548236e-06, + "loss": 0.745, + "step": 850 + }, + { + "epoch": 0.54, + "grad_norm": 0.6629202961921692, + "learning_rate": 9.931464191690015e-06, + "loss": 0.7111, + "step": 860 + }, + { + "epoch": 0.55, + "grad_norm": 0.5166647434234619, + "learning_rate": 9.929826524000948e-06, + "loss": 0.7296, + "step": 870 + }, + { + "epoch": 0.55, + "grad_norm": 0.6730151772499084, + "learning_rate": 9.928169658792498e-06, + "loss": 0.7387, + "step": 880 + }, + { + "epoch": 0.56, + "grad_norm": 0.6847391724586487, + "learning_rate": 9.926493602516758e-06, + "loss": 0.7156, + "step": 890 + }, + { + "epoch": 0.56, + "grad_norm": 0.7915560007095337, + "learning_rate": 9.924798361700554e-06, + "loss": 0.7956, + "step": 900 + }, + { + "epoch": 0.57, + "grad_norm": 0.5927907824516296, + "learning_rate": 9.923083942945419e-06, + "loss": 0.7361, + "step": 910 + }, + { + "epoch": 0.58, + "grad_norm": 0.7477264404296875, + "learning_rate": 9.92135035292757e-06, + "loss": 0.7091, + "step": 920 + }, + { + "epoch": 0.58, + "grad_norm": 0.7492902278900146, + "learning_rate": 9.919597598397882e-06, + "loss": 0.6967, + "step": 930 + }, + { + "epoch": 0.59, + "grad_norm": 0.7357175350189209, + "learning_rate": 9.91782568618185e-06, + "loss": 0.7509, + "step": 940 + }, + { + "epoch": 0.6, + "grad_norm": 0.6629440188407898, + "learning_rate": 9.916034623179584e-06, + "loss": 0.6999, + "step": 950 + }, + { + "epoch": 0.6, + "grad_norm": 0.5954321026802063, + "learning_rate": 9.914224416365765e-06, + "loss": 0.7194, + "step": 960 + }, + { + "epoch": 0.61, + "grad_norm": 0.9139691591262817, + "learning_rate": 9.91239507278962e-06, + "loss": 0.705, + "step": 970 + }, + { + "epoch": 0.61, + "grad_norm": 0.9337642788887024, + "learning_rate": 9.910546599574903e-06, + "loss": 0.7314, + "step": 980 + }, + { + "epoch": 0.62, + "grad_norm": 0.6616548299789429, + "learning_rate": 9.908679003919856e-06, + "loss": 0.7549, + "step": 990 + }, + { + "epoch": 0.63, + "grad_norm": 0.6958469152450562, + "learning_rate": 9.906792293097194e-06, + "loss": 0.7524, + "step": 1000 + }, + { + "epoch": 0.63, + "eval_loss": 0.7200015187263489, + "eval_runtime": 68.2079, + "eval_samples_per_second": 65.975, + "eval_steps_per_second": 4.134, + "step": 1000 + }, + { + "epoch": 0.63, + "grad_norm": 0.5352278351783752, + "learning_rate": 9.904886474454063e-06, + "loss": 0.7218, + "step": 1010 + }, + { + "epoch": 0.64, + "grad_norm": 0.6772333979606628, + "learning_rate": 9.90296155541202e-06, + "loss": 0.7171, + "step": 1020 + }, + { + "epoch": 0.65, + "grad_norm": 0.7102545499801636, + "learning_rate": 9.901017543467005e-06, + "loss": 0.758, + "step": 1030 + }, + { + "epoch": 0.65, + "grad_norm": 0.5379916429519653, + "learning_rate": 9.899054446189305e-06, + "loss": 0.7121, + "step": 1040 + }, + { + "epoch": 0.66, + "grad_norm": 0.6267510056495667, + "learning_rate": 9.897072271223526e-06, + "loss": 0.7088, + "step": 1050 + }, + { + "epoch": 0.66, + "grad_norm": 0.9392660856246948, + "learning_rate": 9.895071026288574e-06, + "loss": 0.7804, + "step": 1060 + }, + { + "epoch": 0.67, + "grad_norm": 0.9270221590995789, + "learning_rate": 9.893050719177608e-06, + "loss": 0.6935, + "step": 1070 + }, + { + "epoch": 0.68, + "grad_norm": 0.722115159034729, + "learning_rate": 9.891011357758022e-06, + "loss": 0.6894, + "step": 1080 + }, + { + "epoch": 0.68, + "grad_norm": 0.7055147886276245, + "learning_rate": 9.888952949971411e-06, + "loss": 0.7244, + "step": 1090 + }, + { + "epoch": 0.69, + "grad_norm": 0.7774051427841187, + "learning_rate": 9.886875503833537e-06, + "loss": 0.8156, + "step": 1100 + }, + { + "epoch": 0.7, + "grad_norm": 0.7965037226676941, + "learning_rate": 9.884779027434304e-06, + "loss": 0.7478, + "step": 1110 + }, + { + "epoch": 0.7, + "grad_norm": 0.8204682469367981, + "learning_rate": 9.882663528937716e-06, + "loss": 0.7187, + "step": 1120 + }, + { + "epoch": 0.71, + "grad_norm": 0.63904869556427, + "learning_rate": 9.880529016581863e-06, + "loss": 0.7145, + "step": 1130 + }, + { + "epoch": 0.72, + "grad_norm": 0.6523028612136841, + "learning_rate": 9.878375498678869e-06, + "loss": 0.731, + "step": 1140 + }, + { + "epoch": 0.72, + "grad_norm": 0.7148768901824951, + "learning_rate": 9.876202983614868e-06, + "loss": 0.7323, + "step": 1150 + }, + { + "epoch": 0.73, + "grad_norm": 0.6108402013778687, + "learning_rate": 9.874011479849981e-06, + "loss": 0.6757, + "step": 1160 + }, + { + "epoch": 0.73, + "grad_norm": 0.6056957244873047, + "learning_rate": 9.871800995918264e-06, + "loss": 0.7258, + "step": 1170 + }, + { + "epoch": 0.74, + "grad_norm": 0.7671077847480774, + "learning_rate": 9.86957154042769e-06, + "loss": 0.7334, + "step": 1180 + }, + { + "epoch": 0.75, + "grad_norm": 0.8327913880348206, + "learning_rate": 9.867323122060108e-06, + "loss": 0.7358, + "step": 1190 + }, + { + "epoch": 0.75, + "grad_norm": 0.7025701999664307, + "learning_rate": 9.865055749571215e-06, + "loss": 0.6976, + "step": 1200 + }, + { + "epoch": 0.75, + "eval_loss": 0.7151169180870056, + "eval_runtime": 64.9708, + "eval_samples_per_second": 69.262, + "eval_steps_per_second": 4.34, + "step": 1200 + }, + { + "epoch": 0.76, + "grad_norm": 0.8391425609588623, + "learning_rate": 9.862769431790513e-06, + "loss": 0.6983, + "step": 1210 + }, + { + "epoch": 0.77, + "grad_norm": 0.7243052124977112, + "learning_rate": 9.860464177621286e-06, + "loss": 0.7171, + "step": 1220 + }, + { + "epoch": 0.77, + "grad_norm": 0.6501705050468445, + "learning_rate": 9.858139996040554e-06, + "loss": 0.7206, + "step": 1230 + }, + { + "epoch": 0.78, + "grad_norm": 0.8618900179862976, + "learning_rate": 9.855796896099044e-06, + "loss": 0.7368, + "step": 1240 + }, + { + "epoch": 0.78, + "grad_norm": 0.826347291469574, + "learning_rate": 9.85343488692116e-06, + "loss": 0.7372, + "step": 1250 + }, + { + "epoch": 0.79, + "grad_norm": 0.8590556383132935, + "learning_rate": 9.851053977704931e-06, + "loss": 0.7373, + "step": 1260 + }, + { + "epoch": 0.8, + "grad_norm": 0.8719233274459839, + "learning_rate": 9.848654177721999e-06, + "loss": 0.7608, + "step": 1270 + }, + { + "epoch": 0.8, + "grad_norm": 0.7729814052581787, + "learning_rate": 9.846235496317556e-06, + "loss": 0.7227, + "step": 1280 + }, + { + "epoch": 0.81, + "grad_norm": 0.801908016204834, + "learning_rate": 9.843797942910328e-06, + "loss": 0.7415, + "step": 1290 + }, + { + "epoch": 0.82, + "grad_norm": 0.9884589910507202, + "learning_rate": 9.841341526992536e-06, + "loss": 0.7206, + "step": 1300 + }, + { + "epoch": 0.82, + "grad_norm": 0.7067356705665588, + "learning_rate": 9.838866258129847e-06, + "loss": 0.6704, + "step": 1310 + }, + { + "epoch": 0.83, + "grad_norm": 0.7258339524269104, + "learning_rate": 9.836372145961346e-06, + "loss": 0.7159, + "step": 1320 + }, + { + "epoch": 0.83, + "grad_norm": 0.8512592315673828, + "learning_rate": 9.833859200199498e-06, + "loss": 0.6916, + "step": 1330 + }, + { + "epoch": 0.84, + "grad_norm": 0.7856159210205078, + "learning_rate": 9.83132743063011e-06, + "loss": 0.7568, + "step": 1340 + }, + { + "epoch": 0.85, + "grad_norm": 0.7149519324302673, + "learning_rate": 9.82877684711229e-06, + "loss": 0.7017, + "step": 1350 + }, + { + "epoch": 0.85, + "grad_norm": 1.0214589834213257, + "learning_rate": 9.826207459578412e-06, + "loss": 0.7127, + "step": 1360 + }, + { + "epoch": 0.86, + "grad_norm": 1.0295792818069458, + "learning_rate": 9.823619278034073e-06, + "loss": 0.7013, + "step": 1370 + }, + { + "epoch": 0.87, + "grad_norm": 0.8674212694168091, + "learning_rate": 9.821012312558059e-06, + "loss": 0.6942, + "step": 1380 + }, + { + "epoch": 0.87, + "grad_norm": 0.7604880332946777, + "learning_rate": 9.818386573302305e-06, + "loss": 0.7013, + "step": 1390 + }, + { + "epoch": 0.88, + "grad_norm": 0.7863268852233887, + "learning_rate": 9.815742070491852e-06, + "loss": 0.7408, + "step": 1400 + }, + { + "epoch": 0.88, + "eval_loss": 0.7116020917892456, + "eval_runtime": 64.7496, + "eval_samples_per_second": 69.498, + "eval_steps_per_second": 4.355, + "step": 1400 + }, + { + "epoch": 0.88, + "grad_norm": 0.7451047301292419, + "learning_rate": 9.81307881442481e-06, + "loss": 0.7105, + "step": 1410 + }, + { + "epoch": 0.89, + "grad_norm": 0.8191768527030945, + "learning_rate": 9.810396815472316e-06, + "loss": 0.6994, + "step": 1420 + }, + { + "epoch": 0.9, + "grad_norm": 0.5049307942390442, + "learning_rate": 9.807696084078494e-06, + "loss": 0.7459, + "step": 1430 + }, + { + "epoch": 0.9, + "grad_norm": 0.762649416923523, + "learning_rate": 9.804976630760419e-06, + "loss": 0.7048, + "step": 1440 + }, + { + "epoch": 0.91, + "grad_norm": 0.8065420985221863, + "learning_rate": 9.802238466108068e-06, + "loss": 0.6975, + "step": 1450 + }, + { + "epoch": 0.92, + "grad_norm": 0.899728000164032, + "learning_rate": 9.799481600784286e-06, + "loss": 0.737, + "step": 1460 + }, + { + "epoch": 0.92, + "grad_norm": 0.7029632925987244, + "learning_rate": 9.796706045524738e-06, + "loss": 0.7236, + "step": 1470 + }, + { + "epoch": 0.93, + "grad_norm": 0.7470441460609436, + "learning_rate": 9.793911811137874e-06, + "loss": 0.6984, + "step": 1480 + }, + { + "epoch": 0.93, + "grad_norm": 0.8542289137840271, + "learning_rate": 9.791098908504884e-06, + "loss": 0.8019, + "step": 1490 + }, + { + "epoch": 0.94, + "grad_norm": 0.749045193195343, + "learning_rate": 9.788267348579649e-06, + "loss": 0.7114, + "step": 1500 + }, + { + "epoch": 0.95, + "grad_norm": 0.7834633588790894, + "learning_rate": 9.78541714238871e-06, + "loss": 0.7222, + "step": 1510 + }, + { + "epoch": 0.95, + "grad_norm": 0.8488750457763672, + "learning_rate": 9.782548301031218e-06, + "loss": 0.7434, + "step": 1520 + }, + { + "epoch": 0.96, + "grad_norm": 0.7018651962280273, + "learning_rate": 9.77966083567889e-06, + "loss": 0.7193, + "step": 1530 + }, + { + "epoch": 0.97, + "grad_norm": 0.8260754346847534, + "learning_rate": 9.776754757575975e-06, + "loss": 0.7763, + "step": 1540 + }, + { + "epoch": 0.97, + "grad_norm": 0.8732118010520935, + "learning_rate": 9.773830078039193e-06, + "loss": 0.7494, + "step": 1550 + }, + { + "epoch": 0.98, + "grad_norm": 0.9026480317115784, + "learning_rate": 9.77088680845771e-06, + "loss": 0.7078, + "step": 1560 + }, + { + "epoch": 0.98, + "grad_norm": 0.7559505105018616, + "learning_rate": 9.767924960293076e-06, + "loss": 0.7468, + "step": 1570 + }, + { + "epoch": 0.99, + "grad_norm": 0.8832489848136902, + "learning_rate": 9.764944545079197e-06, + "loss": 0.7502, + "step": 1580 + }, + { + "epoch": 1.0, + "grad_norm": 0.8065813183784485, + "learning_rate": 9.761945574422276e-06, + "loss": 0.7337, + "step": 1590 + }, + { + "epoch": 1.0, + "grad_norm": 0.6966451406478882, + "learning_rate": 9.758928060000779e-06, + "loss": 0.701, + "step": 1600 + }, + { + "epoch": 1.0, + "eval_loss": 0.7084596157073975, + "eval_runtime": 64.9175, + "eval_samples_per_second": 69.319, + "eval_steps_per_second": 4.344, + "step": 1600 + }, + { + "epoch": 1.01, + "grad_norm": 0.8769924640655518, + "learning_rate": 9.755892013565377e-06, + "loss": 0.7014, + "step": 1610 + }, + { + "epoch": 1.02, + "grad_norm": 0.8940397500991821, + "learning_rate": 9.752837446938915e-06, + "loss": 0.7256, + "step": 1620 + }, + { + "epoch": 1.02, + "grad_norm": 0.7818279266357422, + "learning_rate": 9.749764372016355e-06, + "loss": 0.7268, + "step": 1630 + }, + { + "epoch": 1.03, + "grad_norm": 0.7369450330734253, + "learning_rate": 9.746672800764734e-06, + "loss": 0.6968, + "step": 1640 + }, + { + "epoch": 1.04, + "grad_norm": 0.8924703001976013, + "learning_rate": 9.743562745223118e-06, + "loss": 0.7087, + "step": 1650 + }, + { + "epoch": 1.04, + "grad_norm": 1.0398907661437988, + "learning_rate": 9.740434217502549e-06, + "loss": 0.7199, + "step": 1660 + }, + { + "epoch": 1.05, + "grad_norm": 0.7427188754081726, + "learning_rate": 9.737287229786007e-06, + "loss": 0.687, + "step": 1670 + }, + { + "epoch": 1.05, + "grad_norm": 0.9230946898460388, + "learning_rate": 9.734121794328358e-06, + "loss": 0.7003, + "step": 1680 + }, + { + "epoch": 1.06, + "grad_norm": 0.8461260795593262, + "learning_rate": 9.730937923456303e-06, + "loss": 0.7329, + "step": 1690 + }, + { + "epoch": 1.07, + "grad_norm": 0.783156156539917, + "learning_rate": 9.727735629568335e-06, + "loss": 0.6924, + "step": 1700 + }, + { + "epoch": 1.07, + "grad_norm": 0.8659111261367798, + "learning_rate": 9.724514925134696e-06, + "loss": 0.7219, + "step": 1710 + }, + { + "epoch": 1.08, + "grad_norm": 0.8218225240707397, + "learning_rate": 9.721275822697307e-06, + "loss": 0.6741, + "step": 1720 + }, + { + "epoch": 1.09, + "grad_norm": 0.8807560205459595, + "learning_rate": 9.718018334869748e-06, + "loss": 0.7047, + "step": 1730 + }, + { + "epoch": 1.09, + "grad_norm": 0.9925751090049744, + "learning_rate": 9.714742474337187e-06, + "loss": 0.7156, + "step": 1740 + }, + { + "epoch": 1.1, + "grad_norm": 0.7675251364707947, + "learning_rate": 9.711448253856336e-06, + "loss": 0.6887, + "step": 1750 + }, + { + "epoch": 1.1, + "grad_norm": 0.783015251159668, + "learning_rate": 9.708135686255415e-06, + "loss": 0.7373, + "step": 1760 + }, + { + "epoch": 1.11, + "grad_norm": 0.8704028129577637, + "learning_rate": 9.704804784434077e-06, + "loss": 0.6652, + "step": 1770 + }, + { + "epoch": 1.12, + "grad_norm": 0.9532449245452881, + "learning_rate": 9.701455561363378e-06, + "loss": 0.682, + "step": 1780 + }, + { + "epoch": 1.12, + "grad_norm": 0.9703534245491028, + "learning_rate": 9.698088030085721e-06, + "loss": 0.6844, + "step": 1790 + }, + { + "epoch": 1.13, + "grad_norm": 1.031153678894043, + "learning_rate": 9.694702203714801e-06, + "loss": 0.7084, + "step": 1800 + }, + { + "epoch": 1.13, + "eval_loss": 0.705936074256897, + "eval_runtime": 64.9167, + "eval_samples_per_second": 69.32, + "eval_steps_per_second": 4.344, + "step": 1800 + }, + { + "epoch": 1.14, + "grad_norm": 0.8839524388313293, + "learning_rate": 9.691298095435559e-06, + "loss": 0.6897, + "step": 1810 + }, + { + "epoch": 1.14, + "grad_norm": 1.0173550844192505, + "learning_rate": 9.687875718504126e-06, + "loss": 0.6851, + "step": 1820 + }, + { + "epoch": 1.15, + "grad_norm": 1.0902131795883179, + "learning_rate": 9.684435086247777e-06, + "loss": 0.7132, + "step": 1830 + }, + { + "epoch": 1.15, + "grad_norm": 0.8699798583984375, + "learning_rate": 9.680976212064875e-06, + "loss": 0.7129, + "step": 1840 + }, + { + "epoch": 1.16, + "grad_norm": 0.879970133304596, + "learning_rate": 9.677499109424818e-06, + "loss": 0.6907, + "step": 1850 + }, + { + "epoch": 1.17, + "grad_norm": 0.9659926295280457, + "learning_rate": 9.674003791867993e-06, + "loss": 0.7327, + "step": 1860 + }, + { + "epoch": 1.17, + "grad_norm": 1.0900288820266724, + "learning_rate": 9.670490273005713e-06, + "loss": 0.7304, + "step": 1870 + }, + { + "epoch": 1.18, + "grad_norm": 0.995785117149353, + "learning_rate": 9.666958566520175e-06, + "loss": 0.7076, + "step": 1880 + }, + { + "epoch": 1.19, + "grad_norm": 1.0170907974243164, + "learning_rate": 9.663408686164399e-06, + "loss": 0.691, + "step": 1890 + }, + { + "epoch": 1.19, + "grad_norm": 1.1418849229812622, + "learning_rate": 9.659840645762176e-06, + "loss": 0.74, + "step": 1900 + }, + { + "epoch": 1.2, + "grad_norm": 0.7200061082839966, + "learning_rate": 9.656254459208015e-06, + "loss": 0.7295, + "step": 1910 + }, + { + "epoch": 1.2, + "grad_norm": 0.9135183095932007, + "learning_rate": 9.652650140467094e-06, + "loss": 0.651, + "step": 1920 + }, + { + "epoch": 1.21, + "grad_norm": 0.9724289774894714, + "learning_rate": 9.649027703575193e-06, + "loss": 0.7028, + "step": 1930 + }, + { + "epoch": 1.22, + "grad_norm": 0.8180338740348816, + "learning_rate": 9.645387162638652e-06, + "loss": 0.7179, + "step": 1940 + }, + { + "epoch": 1.22, + "grad_norm": 1.089158296585083, + "learning_rate": 9.641728531834313e-06, + "loss": 0.6872, + "step": 1950 + }, + { + "epoch": 1.23, + "grad_norm": 1.0048317909240723, + "learning_rate": 9.638051825409454e-06, + "loss": 0.6991, + "step": 1960 + }, + { + "epoch": 1.24, + "grad_norm": 1.1580454111099243, + "learning_rate": 9.634357057681749e-06, + "loss": 0.7183, + "step": 1970 + }, + { + "epoch": 1.24, + "grad_norm": 1.0045746564865112, + "learning_rate": 9.630644243039207e-06, + "loss": 0.6795, + "step": 1980 + }, + { + "epoch": 1.25, + "grad_norm": 0.9629393815994263, + "learning_rate": 9.62691339594011e-06, + "loss": 0.7075, + "step": 1990 + }, + { + "epoch": 1.25, + "grad_norm": 0.946081280708313, + "learning_rate": 9.623164530912963e-06, + "loss": 0.6999, + "step": 2000 + }, + { + "epoch": 1.25, + "eval_loss": 0.7040402293205261, + "eval_runtime": 64.9289, + "eval_samples_per_second": 69.307, + "eval_steps_per_second": 4.343, + "step": 2000 + }, + { + "epoch": 1.26, + "grad_norm": 1.0208806991577148, + "learning_rate": 9.619397662556434e-06, + "loss": 0.6947, + "step": 2010 + }, + { + "epoch": 1.27, + "grad_norm": 1.3248392343521118, + "learning_rate": 9.615612805539305e-06, + "loss": 0.7102, + "step": 2020 + }, + { + "epoch": 1.27, + "grad_norm": 0.9521629810333252, + "learning_rate": 9.6118099746004e-06, + "loss": 0.7068, + "step": 2030 + }, + { + "epoch": 1.28, + "grad_norm": 1.129441738128662, + "learning_rate": 9.607989184548544e-06, + "loss": 0.6528, + "step": 2040 + }, + { + "epoch": 1.29, + "grad_norm": 1.2303441762924194, + "learning_rate": 9.604150450262488e-06, + "loss": 0.6838, + "step": 2050 + }, + { + "epoch": 1.29, + "grad_norm": 1.433111310005188, + "learning_rate": 9.600293786690873e-06, + "loss": 0.6908, + "step": 2060 + }, + { + "epoch": 1.3, + "grad_norm": 1.11778724193573, + "learning_rate": 9.596419208852152e-06, + "loss": 0.7153, + "step": 2070 + }, + { + "epoch": 1.3, + "grad_norm": 1.3464716672897339, + "learning_rate": 9.592526731834536e-06, + "loss": 0.67, + "step": 2080 + }, + { + "epoch": 1.31, + "grad_norm": 1.0811423063278198, + "learning_rate": 9.588616370795947e-06, + "loss": 0.705, + "step": 2090 + }, + { + "epoch": 1.32, + "grad_norm": 1.2497215270996094, + "learning_rate": 9.584688140963945e-06, + "loss": 0.7037, + "step": 2100 + }, + { + "epoch": 1.32, + "grad_norm": 1.0369244813919067, + "learning_rate": 9.580742057635672e-06, + "loss": 0.7199, + "step": 2110 + }, + { + "epoch": 1.33, + "grad_norm": 0.8846107125282288, + "learning_rate": 9.576778136177798e-06, + "loss": 0.7098, + "step": 2120 + }, + { + "epoch": 1.34, + "grad_norm": 1.2424838542938232, + "learning_rate": 9.572796392026455e-06, + "loss": 0.7109, + "step": 2130 + }, + { + "epoch": 1.34, + "grad_norm": 1.1349953413009644, + "learning_rate": 9.568796840687184e-06, + "loss": 0.693, + "step": 2140 + }, + { + "epoch": 1.35, + "grad_norm": 0.9105272889137268, + "learning_rate": 9.564779497734864e-06, + "loss": 0.6679, + "step": 2150 + }, + { + "epoch": 1.36, + "grad_norm": 1.021628737449646, + "learning_rate": 9.56074437881366e-06, + "loss": 0.6573, + "step": 2160 + }, + { + "epoch": 1.36, + "grad_norm": 1.1030464172363281, + "learning_rate": 9.55669149963696e-06, + "loss": 0.705, + "step": 2170 + }, + { + "epoch": 1.37, + "grad_norm": 1.1582733392715454, + "learning_rate": 9.552620875987312e-06, + "loss": 0.6932, + "step": 2180 + }, + { + "epoch": 1.37, + "grad_norm": 1.2710620164871216, + "learning_rate": 9.548532523716366e-06, + "loss": 0.6616, + "step": 2190 + }, + { + "epoch": 1.38, + "grad_norm": 1.1528280973434448, + "learning_rate": 9.544426458744805e-06, + "loss": 0.7182, + "step": 2200 + }, + { + "epoch": 1.38, + "eval_loss": 0.7022137641906738, + "eval_runtime": 64.965, + "eval_samples_per_second": 69.268, + "eval_steps_per_second": 4.341, + "step": 2200 + }, + { + "epoch": 1.39, + "grad_norm": 1.1455330848693848, + "learning_rate": 9.540302697062294e-06, + "loss": 0.6878, + "step": 2210 + }, + { + "epoch": 1.39, + "grad_norm": 1.4521374702453613, + "learning_rate": 9.536161254727407e-06, + "loss": 0.6979, + "step": 2220 + }, + { + "epoch": 1.4, + "grad_norm": 1.4062340259552002, + "learning_rate": 9.532002147867575e-06, + "loss": 0.6749, + "step": 2230 + }, + { + "epoch": 1.41, + "grad_norm": 1.0267623662948608, + "learning_rate": 9.527825392679012e-06, + "loss": 0.6987, + "step": 2240 + }, + { + "epoch": 1.41, + "grad_norm": 1.0981144905090332, + "learning_rate": 9.523631005426658e-06, + "loss": 0.6888, + "step": 2250 + }, + { + "epoch": 1.42, + "grad_norm": 1.0353021621704102, + "learning_rate": 9.51941900244412e-06, + "loss": 0.6471, + "step": 2260 + }, + { + "epoch": 1.42, + "grad_norm": 1.1088558435440063, + "learning_rate": 9.515189400133594e-06, + "loss": 0.6689, + "step": 2270 + }, + { + "epoch": 1.43, + "grad_norm": 1.1822565793991089, + "learning_rate": 9.510942214965819e-06, + "loss": 0.7001, + "step": 2280 + }, + { + "epoch": 1.44, + "grad_norm": 1.2247307300567627, + "learning_rate": 9.506677463480003e-06, + "loss": 0.6999, + "step": 2290 + }, + { + "epoch": 1.44, + "grad_norm": 1.163528323173523, + "learning_rate": 9.50239516228376e-06, + "loss": 0.7008, + "step": 2300 + }, + { + "epoch": 1.45, + "grad_norm": 1.2677900791168213, + "learning_rate": 9.49809532805304e-06, + "loss": 0.7122, + "step": 2310 + }, + { + "epoch": 1.46, + "grad_norm": 1.1475526094436646, + "learning_rate": 9.493777977532072e-06, + "loss": 0.7106, + "step": 2320 + }, + { + "epoch": 1.46, + "grad_norm": 1.1459851264953613, + "learning_rate": 9.489443127533304e-06, + "loss": 0.6739, + "step": 2330 + }, + { + "epoch": 1.47, + "grad_norm": 1.2973495721817017, + "learning_rate": 9.485090794937319e-06, + "loss": 0.6888, + "step": 2340 + }, + { + "epoch": 1.47, + "grad_norm": 1.0322624444961548, + "learning_rate": 9.480720996692783e-06, + "loss": 0.6986, + "step": 2350 + }, + { + "epoch": 1.48, + "grad_norm": 1.407605767250061, + "learning_rate": 9.476333749816382e-06, + "loss": 0.7314, + "step": 2360 + }, + { + "epoch": 1.49, + "grad_norm": 1.1082048416137695, + "learning_rate": 9.47192907139274e-06, + "loss": 0.6602, + "step": 2370 + }, + { + "epoch": 1.49, + "grad_norm": 1.02568519115448, + "learning_rate": 9.46750697857437e-06, + "loss": 0.6454, + "step": 2380 + }, + { + "epoch": 1.5, + "grad_norm": 1.12267005443573, + "learning_rate": 9.463067488581598e-06, + "loss": 0.6499, + "step": 2390 + }, + { + "epoch": 1.51, + "grad_norm": 1.0023943185806274, + "learning_rate": 9.45861061870249e-06, + "loss": 0.7267, + "step": 2400 + }, + { + "epoch": 1.51, + "eval_loss": 0.6993948817253113, + "eval_runtime": 64.9272, + "eval_samples_per_second": 69.308, + "eval_steps_per_second": 4.343, + "step": 2400 + }, + { + "epoch": 1.51, + "grad_norm": 1.2597460746765137, + "learning_rate": 9.454136386292804e-06, + "loss": 0.6934, + "step": 2410 + }, + { + "epoch": 1.52, + "grad_norm": 1.293137788772583, + "learning_rate": 9.449644808775902e-06, + "loss": 0.7095, + "step": 2420 + }, + { + "epoch": 1.52, + "grad_norm": 1.0400352478027344, + "learning_rate": 9.445135903642693e-06, + "loss": 0.6626, + "step": 2430 + }, + { + "epoch": 1.53, + "grad_norm": 1.0873581171035767, + "learning_rate": 9.440609688451561e-06, + "loss": 0.6513, + "step": 2440 + }, + { + "epoch": 1.54, + "grad_norm": 1.0420424938201904, + "learning_rate": 9.4360661808283e-06, + "loss": 0.711, + "step": 2450 + }, + { + "epoch": 1.54, + "grad_norm": 1.3502894639968872, + "learning_rate": 9.431505398466045e-06, + "loss": 0.6991, + "step": 2460 + }, + { + "epoch": 1.55, + "grad_norm": 1.3502726554870605, + "learning_rate": 9.426927359125195e-06, + "loss": 0.7073, + "step": 2470 + }, + { + "epoch": 1.56, + "grad_norm": 1.2768748998641968, + "learning_rate": 9.422332080633361e-06, + "loss": 0.6557, + "step": 2480 + }, + { + "epoch": 1.56, + "grad_norm": 1.1925798654556274, + "learning_rate": 9.417719580885275e-06, + "loss": 0.6786, + "step": 2490 + }, + { + "epoch": 1.57, + "grad_norm": 0.9290177822113037, + "learning_rate": 9.413089877842735e-06, + "loss": 0.6159, + "step": 2500 + }, + { + "epoch": 1.57, + "grad_norm": 1.3553310632705688, + "learning_rate": 9.408442989534536e-06, + "loss": 0.7341, + "step": 2510 + }, + { + "epoch": 1.58, + "grad_norm": 0.9777106642723083, + "learning_rate": 9.403778934056392e-06, + "loss": 0.6737, + "step": 2520 + }, + { + "epoch": 1.59, + "grad_norm": 1.47153902053833, + "learning_rate": 9.399097729570865e-06, + "loss": 0.6832, + "step": 2530 + }, + { + "epoch": 1.59, + "grad_norm": 1.2370259761810303, + "learning_rate": 9.394399394307303e-06, + "loss": 0.6691, + "step": 2540 + }, + { + "epoch": 1.6, + "grad_norm": 1.2009457349777222, + "learning_rate": 9.38968394656176e-06, + "loss": 0.7072, + "step": 2550 + }, + { + "epoch": 1.61, + "grad_norm": 1.095410704612732, + "learning_rate": 9.384951404696933e-06, + "loss": 0.7068, + "step": 2560 + }, + { + "epoch": 1.61, + "grad_norm": 1.0805617570877075, + "learning_rate": 9.380201787142085e-06, + "loss": 0.6476, + "step": 2570 + }, + { + "epoch": 1.62, + "grad_norm": 1.3433113098144531, + "learning_rate": 9.37543511239297e-06, + "loss": 0.6805, + "step": 2580 + }, + { + "epoch": 1.62, + "grad_norm": 1.3151830434799194, + "learning_rate": 9.370651399011769e-06, + "loss": 0.6887, + "step": 2590 + }, + { + "epoch": 1.63, + "grad_norm": 1.3367606401443481, + "learning_rate": 9.365850665627016e-06, + "loss": 0.6912, + "step": 2600 + }, + { + "epoch": 1.63, + "eval_loss": 0.6971801519393921, + "eval_runtime": 65.0021, + "eval_samples_per_second": 69.229, + "eval_steps_per_second": 4.338, + "step": 2600 + }, + { + "epoch": 1.64, + "grad_norm": 1.3351305723190308, + "learning_rate": 9.36103293093352e-06, + "loss": 0.6479, + "step": 2610 + }, + { + "epoch": 1.64, + "grad_norm": 1.3986787796020508, + "learning_rate": 9.356198213692297e-06, + "loss": 0.6788, + "step": 2620 + }, + { + "epoch": 1.65, + "grad_norm": 1.0550477504730225, + "learning_rate": 9.351346532730499e-06, + "loss": 0.6481, + "step": 2630 + }, + { + "epoch": 1.66, + "grad_norm": 1.275985836982727, + "learning_rate": 9.346477906941331e-06, + "loss": 0.6893, + "step": 2640 + }, + { + "epoch": 1.66, + "grad_norm": 1.441588044166565, + "learning_rate": 9.341592355283986e-06, + "loss": 0.6784, + "step": 2650 + }, + { + "epoch": 1.67, + "grad_norm": 1.2504793405532837, + "learning_rate": 9.336689896783575e-06, + "loss": 0.6834, + "step": 2660 + }, + { + "epoch": 1.68, + "grad_norm": 1.2592806816101074, + "learning_rate": 9.331770550531037e-06, + "loss": 0.6701, + "step": 2670 + }, + { + "epoch": 1.68, + "grad_norm": 1.494611382484436, + "learning_rate": 9.32683433568308e-06, + "loss": 0.6691, + "step": 2680 + }, + { + "epoch": 1.69, + "grad_norm": 1.2938275337219238, + "learning_rate": 9.321881271462104e-06, + "loss": 0.6818, + "step": 2690 + }, + { + "epoch": 1.69, + "grad_norm": 1.5548397302627563, + "learning_rate": 9.316911377156116e-06, + "loss": 0.6852, + "step": 2700 + }, + { + "epoch": 1.7, + "grad_norm": 1.2488983869552612, + "learning_rate": 9.31192467211867e-06, + "loss": 0.6653, + "step": 2710 + }, + { + "epoch": 1.71, + "grad_norm": 1.3493934869766235, + "learning_rate": 9.306921175768776e-06, + "loss": 0.6671, + "step": 2720 + }, + { + "epoch": 1.71, + "grad_norm": 1.388487696647644, + "learning_rate": 9.301900907590836e-06, + "loss": 0.7066, + "step": 2730 + }, + { + "epoch": 1.72, + "grad_norm": 1.2521592378616333, + "learning_rate": 9.296863887134561e-06, + "loss": 0.7326, + "step": 2740 + }, + { + "epoch": 1.73, + "grad_norm": 1.0157365798950195, + "learning_rate": 9.291810134014904e-06, + "loss": 0.6758, + "step": 2750 + }, + { + "epoch": 1.73, + "grad_norm": 1.0712261199951172, + "learning_rate": 9.286739667911973e-06, + "loss": 0.6645, + "step": 2760 + }, + { + "epoch": 1.74, + "grad_norm": 1.5114517211914062, + "learning_rate": 9.281652508570957e-06, + "loss": 0.6968, + "step": 2770 + }, + { + "epoch": 1.74, + "grad_norm": 1.3408139944076538, + "learning_rate": 9.27654867580206e-06, + "loss": 0.6718, + "step": 2780 + }, + { + "epoch": 1.75, + "grad_norm": 1.3024680614471436, + "learning_rate": 9.271428189480405e-06, + "loss": 0.6915, + "step": 2790 + }, + { + "epoch": 1.76, + "grad_norm": 1.3444178104400635, + "learning_rate": 9.266291069545972e-06, + "loss": 0.6821, + "step": 2800 + }, + { + "epoch": 1.76, + "eval_loss": 0.6953641176223755, + "eval_runtime": 65.0162, + "eval_samples_per_second": 69.213, + "eval_steps_per_second": 4.337, + "step": 2800 + }, + { + "epoch": 1.76, + "grad_norm": 1.5429843664169312, + "learning_rate": 9.261137336003511e-06, + "loss": 0.666, + "step": 2810 + }, + { + "epoch": 1.77, + "grad_norm": 1.143649697303772, + "learning_rate": 9.255967008922475e-06, + "loss": 0.6414, + "step": 2820 + }, + { + "epoch": 1.78, + "grad_norm": 1.2989628314971924, + "learning_rate": 9.250780108436926e-06, + "loss": 0.7321, + "step": 2830 + }, + { + "epoch": 1.78, + "grad_norm": 1.4191828966140747, + "learning_rate": 9.245576654745471e-06, + "loss": 0.735, + "step": 2840 + }, + { + "epoch": 1.79, + "grad_norm": 1.5203850269317627, + "learning_rate": 9.24035666811118e-06, + "loss": 0.6809, + "step": 2850 + }, + { + "epoch": 1.79, + "grad_norm": 1.3680098056793213, + "learning_rate": 9.235120168861495e-06, + "loss": 0.6378, + "step": 2860 + }, + { + "epoch": 1.8, + "grad_norm": 1.346763014793396, + "learning_rate": 9.229867177388172e-06, + "loss": 0.6648, + "step": 2870 + }, + { + "epoch": 1.81, + "grad_norm": 1.0098768472671509, + "learning_rate": 9.224597714147186e-06, + "loss": 0.6681, + "step": 2880 + }, + { + "epoch": 1.81, + "grad_norm": 1.3174008131027222, + "learning_rate": 9.219311799658652e-06, + "loss": 0.6752, + "step": 2890 + }, + { + "epoch": 1.82, + "grad_norm": 1.692084789276123, + "learning_rate": 9.214009454506754e-06, + "loss": 0.6427, + "step": 2900 + }, + { + "epoch": 1.83, + "grad_norm": 1.3471505641937256, + "learning_rate": 9.208690699339656e-06, + "loss": 0.6763, + "step": 2910 + }, + { + "epoch": 1.83, + "grad_norm": 1.202491283416748, + "learning_rate": 9.203355554869428e-06, + "loss": 0.6935, + "step": 2920 + }, + { + "epoch": 1.84, + "grad_norm": 1.7211599349975586, + "learning_rate": 9.198004041871962e-06, + "loss": 0.7012, + "step": 2930 + }, + { + "epoch": 1.84, + "grad_norm": 1.336504578590393, + "learning_rate": 9.192636181186887e-06, + "loss": 0.6713, + "step": 2940 + }, + { + "epoch": 1.85, + "grad_norm": 1.2259244918823242, + "learning_rate": 9.1872519937175e-06, + "loss": 0.6344, + "step": 2950 + }, + { + "epoch": 1.86, + "grad_norm": 1.3948123455047607, + "learning_rate": 9.181851500430672e-06, + "loss": 0.6699, + "step": 2960 + }, + { + "epoch": 1.86, + "grad_norm": 1.2859784364700317, + "learning_rate": 9.176434722356772e-06, + "loss": 0.7029, + "step": 2970 + }, + { + "epoch": 1.87, + "grad_norm": 1.1549146175384521, + "learning_rate": 9.17100168058959e-06, + "loss": 0.6491, + "step": 2980 + }, + { + "epoch": 1.88, + "grad_norm": 1.575208306312561, + "learning_rate": 9.165552396286236e-06, + "loss": 0.6722, + "step": 2990 + }, + { + "epoch": 1.88, + "grad_norm": 1.6159918308258057, + "learning_rate": 9.160086890667086e-06, + "loss": 0.7104, + "step": 3000 + }, + { + "epoch": 1.88, + "eval_loss": 0.6943792104721069, + "eval_runtime": 65.034, + "eval_samples_per_second": 69.195, + "eval_steps_per_second": 4.336, + "step": 3000 + }, + { + "epoch": 1.89, + "grad_norm": 1.247308611869812, + "learning_rate": 9.154605185015678e-06, + "loss": 0.7042, + "step": 3010 + }, + { + "epoch": 1.89, + "grad_norm": 1.540644884109497, + "learning_rate": 9.14910730067863e-06, + "loss": 0.6208, + "step": 3020 + }, + { + "epoch": 1.9, + "grad_norm": 1.4479825496673584, + "learning_rate": 9.143593259065573e-06, + "loss": 0.6721, + "step": 3030 + }, + { + "epoch": 1.91, + "grad_norm": 1.5486655235290527, + "learning_rate": 9.138063081649052e-06, + "loss": 0.6328, + "step": 3040 + }, + { + "epoch": 1.91, + "grad_norm": 1.0703155994415283, + "learning_rate": 9.132516789964443e-06, + "loss": 0.6564, + "step": 3050 + }, + { + "epoch": 1.92, + "grad_norm": 1.2725510597229004, + "learning_rate": 9.126954405609882e-06, + "loss": 0.6782, + "step": 3060 + }, + { + "epoch": 1.93, + "grad_norm": 1.328399419784546, + "learning_rate": 9.121375950246165e-06, + "loss": 0.6686, + "step": 3070 + }, + { + "epoch": 1.93, + "grad_norm": 1.2014747858047485, + "learning_rate": 9.115781445596676e-06, + "loss": 0.6445, + "step": 3080 + }, + { + "epoch": 1.94, + "grad_norm": 1.3578124046325684, + "learning_rate": 9.110170913447294e-06, + "loss": 0.6306, + "step": 3090 + }, + { + "epoch": 1.94, + "grad_norm": 1.3624286651611328, + "learning_rate": 9.104544375646314e-06, + "loss": 0.6465, + "step": 3100 + }, + { + "epoch": 1.95, + "grad_norm": 1.709974765777588, + "learning_rate": 9.098901854104359e-06, + "loss": 0.6985, + "step": 3110 + }, + { + "epoch": 1.96, + "grad_norm": 1.3302754163742065, + "learning_rate": 9.09324337079429e-06, + "loss": 0.7272, + "step": 3120 + }, + { + "epoch": 1.96, + "grad_norm": 1.2946594953536987, + "learning_rate": 9.08756894775114e-06, + "loss": 0.6632, + "step": 3130 + }, + { + "epoch": 1.97, + "grad_norm": 1.2699226140975952, + "learning_rate": 9.081878607071996e-06, + "loss": 0.6996, + "step": 3140 + }, + { + "epoch": 1.98, + "grad_norm": 1.4561275243759155, + "learning_rate": 9.076172370915944e-06, + "loss": 0.734, + "step": 3150 + }, + { + "epoch": 1.98, + "grad_norm": 1.4393534660339355, + "learning_rate": 9.07045026150396e-06, + "loss": 0.6578, + "step": 3160 + }, + { + "epoch": 1.99, + "grad_norm": 1.4745630025863647, + "learning_rate": 9.064712301118842e-06, + "loss": 0.6527, + "step": 3170 + }, + { + "epoch": 1.99, + "grad_norm": 1.1444178819656372, + "learning_rate": 9.058958512105104e-06, + "loss": 0.6487, + "step": 3180 + }, + { + "epoch": 2.0, + "grad_norm": 1.433406114578247, + "learning_rate": 9.053188916868912e-06, + "loss": 0.7011, + "step": 3190 + }, + { + "epoch": 2.01, + "grad_norm": 1.218345046043396, + "learning_rate": 9.04740353787797e-06, + "loss": 0.6222, + "step": 3200 + }, + { + "epoch": 2.01, + "eval_loss": 0.693417489528656, + "eval_runtime": 65.021, + "eval_samples_per_second": 69.208, + "eval_steps_per_second": 4.337, + "step": 3200 + }, + { + "epoch": 2.01, + "grad_norm": 1.5473078489303589, + "learning_rate": 9.041602397661459e-06, + "loss": 0.6396, + "step": 3210 + }, + { + "epoch": 2.02, + "grad_norm": 1.3116644620895386, + "learning_rate": 9.035785518809928e-06, + "loss": 0.6582, + "step": 3220 + }, + { + "epoch": 2.03, + "grad_norm": 1.7744321823120117, + "learning_rate": 9.029952923975217e-06, + "loss": 0.6517, + "step": 3230 + }, + { + "epoch": 2.03, + "grad_norm": 1.5516449213027954, + "learning_rate": 9.024104635870368e-06, + "loss": 0.6465, + "step": 3240 + }, + { + "epoch": 2.04, + "grad_norm": 1.4612600803375244, + "learning_rate": 9.018240677269532e-06, + "loss": 0.6215, + "step": 3250 + }, + { + "epoch": 2.05, + "grad_norm": 1.7983644008636475, + "learning_rate": 9.012361071007892e-06, + "loss": 0.6609, + "step": 3260 + }, + { + "epoch": 2.05, + "grad_norm": 1.6382901668548584, + "learning_rate": 9.00646583998155e-06, + "loss": 0.6608, + "step": 3270 + }, + { + "epoch": 2.06, + "grad_norm": 1.6763097047805786, + "learning_rate": 9.000555007147469e-06, + "loss": 0.6222, + "step": 3280 + }, + { + "epoch": 2.06, + "grad_norm": 1.3221015930175781, + "learning_rate": 8.994628595523358e-06, + "loss": 0.6363, + "step": 3290 + }, + { + "epoch": 2.07, + "grad_norm": 1.5837445259094238, + "learning_rate": 8.988686628187597e-06, + "loss": 0.6364, + "step": 3300 + }, + { + "epoch": 2.08, + "grad_norm": 1.4271923303604126, + "learning_rate": 8.98272912827914e-06, + "loss": 0.6211, + "step": 3310 + }, + { + "epoch": 2.08, + "grad_norm": 1.599827527999878, + "learning_rate": 8.97675611899743e-06, + "loss": 0.6326, + "step": 3320 + }, + { + "epoch": 2.09, + "grad_norm": 1.6661384105682373, + "learning_rate": 8.970767623602299e-06, + "loss": 0.7006, + "step": 3330 + }, + { + "epoch": 2.1, + "grad_norm": 1.9200857877731323, + "learning_rate": 8.964763665413894e-06, + "loss": 0.6316, + "step": 3340 + }, + { + "epoch": 2.1, + "grad_norm": 1.8148436546325684, + "learning_rate": 8.95874426781257e-06, + "loss": 0.6318, + "step": 3350 + }, + { + "epoch": 2.11, + "grad_norm": 1.3358807563781738, + "learning_rate": 8.952709454238809e-06, + "loss": 0.6067, + "step": 3360 + }, + { + "epoch": 2.11, + "grad_norm": 1.8055490255355835, + "learning_rate": 8.946659248193122e-06, + "loss": 0.6289, + "step": 3370 + }, + { + "epoch": 2.12, + "grad_norm": 1.4589310884475708, + "learning_rate": 8.940593673235962e-06, + "loss": 0.6537, + "step": 3380 + }, + { + "epoch": 2.13, + "grad_norm": 1.903086543083191, + "learning_rate": 8.934512752987635e-06, + "loss": 0.6986, + "step": 3390 + }, + { + "epoch": 2.13, + "grad_norm": 1.722476840019226, + "learning_rate": 8.928416511128194e-06, + "loss": 0.6383, + "step": 3400 + }, + { + "epoch": 2.13, + "eval_loss": 0.6974382996559143, + "eval_runtime": 65.0527, + "eval_samples_per_second": 69.175, + "eval_steps_per_second": 4.335, + "step": 3400 + }, + { + "epoch": 2.14, + "grad_norm": 1.8206970691680908, + "learning_rate": 8.922304971397369e-06, + "loss": 0.6447, + "step": 3410 + }, + { + "epoch": 2.15, + "grad_norm": 1.690631628036499, + "learning_rate": 8.916178157594453e-06, + "loss": 0.6441, + "step": 3420 + }, + { + "epoch": 2.15, + "grad_norm": 2.108876943588257, + "learning_rate": 8.910036093578223e-06, + "loss": 0.6453, + "step": 3430 + }, + { + "epoch": 2.16, + "grad_norm": 1.6356040239334106, + "learning_rate": 8.90387880326684e-06, + "loss": 0.6256, + "step": 3440 + }, + { + "epoch": 2.16, + "grad_norm": 1.3783752918243408, + "learning_rate": 8.897706310637766e-06, + "loss": 0.6244, + "step": 3450 + }, + { + "epoch": 2.17, + "grad_norm": 1.901208758354187, + "learning_rate": 8.89151863972765e-06, + "loss": 0.5975, + "step": 3460 + }, + { + "epoch": 2.18, + "grad_norm": 1.595009446144104, + "learning_rate": 8.88531581463226e-06, + "loss": 0.6777, + "step": 3470 + }, + { + "epoch": 2.18, + "grad_norm": 1.6791367530822754, + "learning_rate": 8.879097859506371e-06, + "loss": 0.6139, + "step": 3480 + }, + { + "epoch": 2.19, + "grad_norm": 1.5569841861724854, + "learning_rate": 8.872864798563676e-06, + "loss": 0.5775, + "step": 3490 + }, + { + "epoch": 2.2, + "grad_norm": 1.3804527521133423, + "learning_rate": 8.866616656076696e-06, + "loss": 0.624, + "step": 3500 + }, + { + "epoch": 2.2, + "grad_norm": 1.7905445098876953, + "learning_rate": 8.860353456376679e-06, + "loss": 0.6647, + "step": 3510 + }, + { + "epoch": 2.21, + "grad_norm": 1.802614688873291, + "learning_rate": 8.854075223853509e-06, + "loss": 0.7081, + "step": 3520 + }, + { + "epoch": 2.21, + "grad_norm": 1.7135951519012451, + "learning_rate": 8.847781982955613e-06, + "loss": 0.6974, + "step": 3530 + }, + { + "epoch": 2.22, + "grad_norm": 1.8468406200408936, + "learning_rate": 8.841473758189853e-06, + "loss": 0.6585, + "step": 3540 + }, + { + "epoch": 2.23, + "grad_norm": 1.7520302534103394, + "learning_rate": 8.835150574121455e-06, + "loss": 0.6116, + "step": 3550 + }, + { + "epoch": 2.23, + "grad_norm": 1.862479329109192, + "learning_rate": 8.828812455373891e-06, + "loss": 0.6333, + "step": 3560 + }, + { + "epoch": 2.24, + "grad_norm": 1.7767084836959839, + "learning_rate": 8.82245942662879e-06, + "loss": 0.6015, + "step": 3570 + }, + { + "epoch": 2.25, + "grad_norm": 1.6162598133087158, + "learning_rate": 8.816091512625845e-06, + "loss": 0.6719, + "step": 3580 + }, + { + "epoch": 2.25, + "grad_norm": 1.8923571109771729, + "learning_rate": 8.80970873816271e-06, + "loss": 0.6562, + "step": 3590 + }, + { + "epoch": 2.26, + "grad_norm": 1.7792338132858276, + "learning_rate": 8.803311128094918e-06, + "loss": 0.6436, + "step": 3600 + }, + { + "epoch": 2.26, + "eval_loss": 0.6980520486831665, + "eval_runtime": 65.0239, + "eval_samples_per_second": 69.205, + "eval_steps_per_second": 4.337, + "step": 3600 + }, + { + "epoch": 2.26, + "grad_norm": 1.819449543952942, + "learning_rate": 8.796898707335766e-06, + "loss": 0.6022, + "step": 3610 + }, + { + "epoch": 2.27, + "grad_norm": 1.923462986946106, + "learning_rate": 8.790471500856229e-06, + "loss": 0.6124, + "step": 3620 + }, + { + "epoch": 2.28, + "grad_norm": 2.0969247817993164, + "learning_rate": 8.784029533684857e-06, + "loss": 0.6209, + "step": 3630 + }, + { + "epoch": 2.28, + "grad_norm": 2.014631509780884, + "learning_rate": 8.777572830907685e-06, + "loss": 0.6179, + "step": 3640 + }, + { + "epoch": 2.29, + "grad_norm": 1.7940195798873901, + "learning_rate": 8.771101417668127e-06, + "loss": 0.6815, + "step": 3650 + }, + { + "epoch": 2.3, + "grad_norm": 1.7244881391525269, + "learning_rate": 8.764615319166885e-06, + "loss": 0.5767, + "step": 3660 + }, + { + "epoch": 2.3, + "grad_norm": 2.157749652862549, + "learning_rate": 8.758114560661846e-06, + "loss": 0.6281, + "step": 3670 + }, + { + "epoch": 2.31, + "grad_norm": 1.818303108215332, + "learning_rate": 8.751599167467985e-06, + "loss": 0.6368, + "step": 3680 + }, + { + "epoch": 2.31, + "grad_norm": 1.8076434135437012, + "learning_rate": 8.745069164957265e-06, + "loss": 0.6503, + "step": 3690 + }, + { + "epoch": 2.32, + "grad_norm": 1.7755082845687866, + "learning_rate": 8.738524578558547e-06, + "loss": 0.6503, + "step": 3700 + }, + { + "epoch": 2.33, + "grad_norm": 2.0641837120056152, + "learning_rate": 8.731965433757474e-06, + "loss": 0.6412, + "step": 3710 + }, + { + "epoch": 2.33, + "grad_norm": 2.174612045288086, + "learning_rate": 8.72539175609639e-06, + "loss": 0.6283, + "step": 3720 + }, + { + "epoch": 2.34, + "grad_norm": 1.905965805053711, + "learning_rate": 8.718803571174229e-06, + "loss": 0.6316, + "step": 3730 + }, + { + "epoch": 2.35, + "grad_norm": 2.290787935256958, + "learning_rate": 8.712200904646417e-06, + "loss": 0.6337, + "step": 3740 + }, + { + "epoch": 2.35, + "grad_norm": 1.7773081064224243, + "learning_rate": 8.705583782224776e-06, + "loss": 0.6683, + "step": 3750 + }, + { + "epoch": 2.36, + "grad_norm": 1.7513020038604736, + "learning_rate": 8.698952229677422e-06, + "loss": 0.6538, + "step": 3760 + }, + { + "epoch": 2.37, + "grad_norm": 1.8641185760498047, + "learning_rate": 8.692306272828661e-06, + "loss": 0.6179, + "step": 3770 + }, + { + "epoch": 2.37, + "grad_norm": 2.4094667434692383, + "learning_rate": 8.685645937558896e-06, + "loss": 0.6436, + "step": 3780 + }, + { + "epoch": 2.38, + "grad_norm": 2.295719623565674, + "learning_rate": 8.678971249804517e-06, + "loss": 0.6242, + "step": 3790 + }, + { + "epoch": 2.38, + "grad_norm": 2.3604509830474854, + "learning_rate": 8.67228223555781e-06, + "loss": 0.6444, + "step": 3800 + }, + { + "epoch": 2.38, + "eval_loss": 0.6968220472335815, + "eval_runtime": 65.0257, + "eval_samples_per_second": 69.203, + "eval_steps_per_second": 4.337, + "step": 3800 + }, + { + "epoch": 2.39, + "grad_norm": 1.799545407295227, + "learning_rate": 8.665578920866844e-06, + "loss": 0.6562, + "step": 3810 + }, + { + "epoch": 2.4, + "grad_norm": 1.87678062915802, + "learning_rate": 8.658861331835384e-06, + "loss": 0.6776, + "step": 3820 + }, + { + "epoch": 2.4, + "grad_norm": 1.9466888904571533, + "learning_rate": 8.652129494622776e-06, + "loss": 0.6245, + "step": 3830 + }, + { + "epoch": 2.41, + "grad_norm": 1.9451625347137451, + "learning_rate": 8.645383435443853e-06, + "loss": 0.6692, + "step": 3840 + }, + { + "epoch": 2.42, + "grad_norm": 1.9275856018066406, + "learning_rate": 8.638623180568829e-06, + "loss": 0.6314, + "step": 3850 + }, + { + "epoch": 2.42, + "grad_norm": 1.7316443920135498, + "learning_rate": 8.631848756323198e-06, + "loss": 0.6289, + "step": 3860 + }, + { + "epoch": 2.43, + "grad_norm": 2.5170657634735107, + "learning_rate": 8.625060189087636e-06, + "loss": 0.6367, + "step": 3870 + }, + { + "epoch": 2.43, + "grad_norm": 2.2198870182037354, + "learning_rate": 8.618257505297887e-06, + "loss": 0.6262, + "step": 3880 + }, + { + "epoch": 2.44, + "grad_norm": 1.9207948446273804, + "learning_rate": 8.611440731444673e-06, + "loss": 0.6598, + "step": 3890 + }, + { + "epoch": 2.45, + "grad_norm": 2.0086634159088135, + "learning_rate": 8.604609894073583e-06, + "loss": 0.6465, + "step": 3900 + }, + { + "epoch": 2.45, + "grad_norm": 2.0597639083862305, + "learning_rate": 8.597765019784972e-06, + "loss": 0.665, + "step": 3910 + }, + { + "epoch": 2.46, + "grad_norm": 1.8585723638534546, + "learning_rate": 8.590906135233854e-06, + "loss": 0.6207, + "step": 3920 + }, + { + "epoch": 2.47, + "grad_norm": 2.1862194538116455, + "learning_rate": 8.584033267129807e-06, + "loss": 0.6626, + "step": 3930 + }, + { + "epoch": 2.47, + "grad_norm": 2.048553228378296, + "learning_rate": 8.577146442236856e-06, + "loss": 0.6141, + "step": 3940 + }, + { + "epoch": 2.48, + "grad_norm": 2.2547719478607178, + "learning_rate": 8.570245687373384e-06, + "loss": 0.6651, + "step": 3950 + }, + { + "epoch": 2.48, + "grad_norm": 1.9522244930267334, + "learning_rate": 8.563331029412013e-06, + "loss": 0.6725, + "step": 3960 + }, + { + "epoch": 2.49, + "grad_norm": 1.7376751899719238, + "learning_rate": 8.556402495279506e-06, + "loss": 0.6066, + "step": 3970 + }, + { + "epoch": 2.5, + "grad_norm": 1.900639295578003, + "learning_rate": 8.549460111956665e-06, + "loss": 0.6752, + "step": 3980 + }, + { + "epoch": 2.5, + "grad_norm": 2.1750218868255615, + "learning_rate": 8.542503906478224e-06, + "loss": 0.6554, + "step": 3990 + }, + { + "epoch": 2.51, + "grad_norm": 1.6551765203475952, + "learning_rate": 8.535533905932739e-06, + "loss": 0.6368, + "step": 4000 + }, + { + "epoch": 2.51, + "eval_loss": 0.6986888647079468, + "eval_runtime": 65.007, + "eval_samples_per_second": 69.223, + "eval_steps_per_second": 4.338, + "step": 4000 + }, + { + "epoch": 2.51, + "step": 4000, + "total_flos": 9.03392259225944e+17, + "train_loss": 0.7081527805328369, + "train_runtime": 4312.5386, + "train_samples_per_second": 59.13, + "train_steps_per_second": 3.696 + } + ], + "logging_steps": 10, + "max_steps": 15940, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 1000, + "total_flos": 9.03392259225944e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/llama2_13b_peft/news_commentary_de/training_args.bin b/llama2_13b_peft/news_commentary_de/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e61aabdc538aa860ef0be76ab409f21896bcce75 --- /dev/null +++ b/llama2_13b_peft/news_commentary_de/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190c16756cb1d693a35940640a359e26ac9a5176a57e5fc3919e9b137d670ef2 +size 5112 diff --git a/llama2_13b_peft/news_commentary_de/training_eval_loss.png b/llama2_13b_peft/news_commentary_de/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..82dd253be4cc097686530cf0de0f84823c1046b3 Binary files /dev/null and b/llama2_13b_peft/news_commentary_de/training_eval_loss.png differ diff --git a/llama2_13b_peft/news_commentary_de/training_loss.png b/llama2_13b_peft/news_commentary_de/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..6b137300f6a1622980a55115a5f0afa9777137c6 Binary files /dev/null and b/llama2_13b_peft/news_commentary_de/training_loss.png differ diff --git a/llama2_13b_peft/news_commentary_it/README.md b/llama2_13b_peft/news_commentary_it/README.md new file mode 100644 index 0000000000000000000000000000000000000000..979c7cd498f684b3d656289b32fb7036d574cd07 --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/README.md @@ -0,0 +1,67 @@ +--- +license: other +library_name: peft +tags: +- llama-factory +- lora +- generated_from_trainer +base_model: /data1/model/llama2/meta-llama/Llama2-13b +model-index: +- name: news_commentary_it_no_sys + results: [] +--- + + + +# news_commentary_it_no_sys + +This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the news_commentary_it_no_sys dataset. +It achieves the following results on the evaluation set: +- Loss: 0.6415 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 4 +- eval_batch_size: 4 +- seed: 42 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 20 +- num_epochs: 5.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:------:|:-----:|:---------------:| +| 0.7236 | 0.2807 | 2000 | 0.6710 | +| 0.724 | 0.5614 | 4000 | 0.6521 | +| 0.6455 | 0.8421 | 6000 | 0.6415 | +| 0.5533 | 1.1228 | 8000 | 0.6548 | +| 0.5192 | 1.4035 | 10000 | 0.6501 | +| 0.4796 | 1.6842 | 12000 | 0.6500 | + + +### Framework versions + +- PEFT 0.10.0 +- Transformers 4.40.0 +- Pytorch 2.2.1 +- Datasets 2.18.0 +- Tokenizers 0.19.1 \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_it/adapter_config.json b/llama2_13b_peft/news_commentary_it/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e6019441aab7d98b76c7a4c89053d37c16d3508f --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_it/adapter_model.safetensors b/llama2_13b_peft/news_commentary_it/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78cab5c59f85c013d054f0930539620891acdaaf --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edef1dfcb7b9574ec3ab4820df092cc5ff382071e97048d16d7f69d7094885d3 +size 125248064 diff --git a/llama2_13b_peft/news_commentary_it/all_results.json b/llama2_13b_peft/news_commentary_it/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ab3f9de2e955481cb5f26fa6056e77bb1df25784 --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/all_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 1.6842105263157894, + "eval_loss": 0.6415141820907593, + "eval_runtime": 119.5773, + "eval_samples_per_second": 12.544, + "eval_steps_per_second": 3.136, + "total_flos": 6.933368738955264e+17, + "train_loss": 0.6038338423768679, + "train_runtime": 5861.7175, + "train_samples_per_second": 24.31, + "train_steps_per_second": 6.078 +} \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_it/eval_results.json b/llama2_13b_peft/news_commentary_it/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ccc32e68e72cfd87a3aca4b688dfd1c5f2693feb --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/eval_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 1.6842105263157894, + "eval_loss": 0.6415141820907593, + "eval_runtime": 119.5773, + "eval_samples_per_second": 12.544, + "eval_steps_per_second": 3.136 +} \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_it/special_tokens_map.json b/llama2_13b_peft/news_commentary_it/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama2_13b_peft/news_commentary_it/tokenizer.model b/llama2_13b_peft/news_commentary_it/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/llama2_13b_peft/news_commentary_it/tokenizer_config.json b/llama2_13b_peft/news_commentary_it/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/llama2_13b_peft/news_commentary_it/train_results.json b/llama2_13b_peft/news_commentary_it/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d54650ad4153608c2e0fd8787c597430eebf1c9c --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.6842105263157894, + "total_flos": 6.933368738955264e+17, + "train_loss": 0.6038338423768679, + "train_runtime": 5861.7175, + "train_samples_per_second": 24.31, + "train_steps_per_second": 6.078 +} \ No newline at end of file diff --git a/llama2_13b_peft/news_commentary_it/trainer_log.jsonl b/llama2_13b_peft/news_commentary_it/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a78faae2e7efd81f213aed28ad03e19246d508ef --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/trainer_log.jsonl @@ -0,0 +1,1208 @@ +{"current_steps": 10, "total_steps": 35625, "loss": 1.3423, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5e-05, "epoch": 0.0014035087719298245, "percentage": 0.03, "elapsed_time": "0:00:05", "remaining_time": "5:16:28"} +{"current_steps": 20, "total_steps": 35625, "loss": 1.3855, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-05, "epoch": 0.002807017543859649, "percentage": 0.06, "elapsed_time": "0:00:08", "remaining_time": "4:19:15"} +{"current_steps": 30, "total_steps": 35625, "loss": 0.9621, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999999026832157e-05, "epoch": 0.004210526315789474, "percentage": 0.08, "elapsed_time": "0:00:12", "remaining_time": "4:01:34"} +{"current_steps": 40, "total_steps": 35625, "loss": 0.8217, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999961073293845e-05, "epoch": 0.005614035087719298, "percentage": 0.11, "elapsed_time": "0:00:15", "remaining_time": "3:47:11"} +{"current_steps": 50, "total_steps": 35625, "loss": 0.7743, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999912414939555e-05, "epoch": 0.007017543859649123, "percentage": 0.14, "elapsed_time": "0:00:20", "remaining_time": "4:02:42"} +{"current_steps": 60, "total_steps": 35625, "loss": 0.7671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999844293296585e-05, "epoch": 0.008421052631578947, "percentage": 0.17, "elapsed_time": "0:00:25", "remaining_time": "4:07:49"} +{"current_steps": 70, "total_steps": 35625, "loss": 0.7657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999975670841798e-05, "epoch": 0.009824561403508772, "percentage": 0.2, "elapsed_time": "0:00:28", "remaining_time": "3:59:45"} +{"current_steps": 80, "total_steps": 35625, "loss": 0.6544, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999649660371906e-05, "epoch": 0.011228070175438596, "percentage": 0.22, "elapsed_time": "0:00:31", "remaining_time": "3:53:14"} +{"current_steps": 90, "total_steps": 35625, "loss": 0.7627, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999523149241714e-05, "epoch": 0.01263157894736842, "percentage": 0.25, "elapsed_time": "0:00:35", "remaining_time": "3:55:41"} +{"current_steps": 100, "total_steps": 35625, "loss": 0.7428, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99993771751259e-05, "epoch": 0.014035087719298246, "percentage": 0.28, "elapsed_time": "0:00:39", "remaining_time": "3:52:59"} +{"current_steps": 110, "total_steps": 35625, "loss": 0.7024, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999921173813812e-05, "epoch": 0.015438596491228071, "percentage": 0.31, "elapsed_time": "0:00:42", "remaining_time": "3:49:59"} +{"current_steps": 120, "total_steps": 35625, "loss": 0.8205, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999902683840715e-05, "epoch": 0.016842105263157894, "percentage": 0.34, "elapsed_time": "0:00:46", "remaining_time": "3:48:34"} +{"current_steps": 130, "total_steps": 35625, "loss": 0.7359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9998822476076955e-05, "epoch": 0.018245614035087718, "percentage": 0.36, "elapsed_time": "0:00:51", "remaining_time": "3:54:03"} +{"current_steps": 140, "total_steps": 35625, "loss": 0.6837, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999859865130664e-05, "epoch": 0.019649122807017545, "percentage": 0.39, "elapsed_time": "0:00:54", "remaining_time": "3:50:53"} +{"current_steps": 150, "total_steps": 35625, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9998355364270445e-05, "epoch": 0.021052631578947368, "percentage": 0.42, "elapsed_time": "0:00:57", "remaining_time": "3:47:49"} +{"current_steps": 160, "total_steps": 35625, "loss": 0.7608, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999809261515779e-05, "epoch": 0.02245614035087719, "percentage": 0.45, "elapsed_time": "0:01:00", "remaining_time": "3:44:14"} +{"current_steps": 170, "total_steps": 35625, "loss": 0.7725, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9997810404173234e-05, "epoch": 0.023859649122807018, "percentage": 0.48, "elapsed_time": "0:01:04", "remaining_time": "3:44:41"} +{"current_steps": 180, "total_steps": 35625, "loss": 0.7884, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999750873153648e-05, "epoch": 0.02526315789473684, "percentage": 0.51, "elapsed_time": "0:01:10", "remaining_time": "3:50:58"} +{"current_steps": 190, "total_steps": 35625, "loss": 0.7266, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9997187597482405e-05, "epoch": 0.02666666666666667, "percentage": 0.53, "elapsed_time": "0:01:13", "remaining_time": "3:48:23"} +{"current_steps": 200, "total_steps": 35625, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9996847002261006e-05, "epoch": 0.028070175438596492, "percentage": 0.56, "elapsed_time": "0:01:16", "remaining_time": "3:46:53"} +{"current_steps": 210, "total_steps": 35625, "loss": 0.6162, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999648694613746e-05, "epoch": 0.029473684210526315, "percentage": 0.59, "elapsed_time": "0:01:20", "remaining_time": "3:46:31"} +{"current_steps": 220, "total_steps": 35625, "loss": 0.6696, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9996107429392083e-05, "epoch": 0.030877192982456142, "percentage": 0.62, "elapsed_time": "0:01:23", "remaining_time": "3:44:22"} +{"current_steps": 230, "total_steps": 35625, "loss": 0.7512, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9995708452320325e-05, "epoch": 0.032280701754385965, "percentage": 0.65, "elapsed_time": "0:01:26", "remaining_time": "3:42:35"} +{"current_steps": 240, "total_steps": 35625, "loss": 0.7137, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999529001523282e-05, "epoch": 0.03368421052631579, "percentage": 0.67, "elapsed_time": "0:01:29", "remaining_time": "3:40:42"} +{"current_steps": 250, "total_steps": 35625, "loss": 0.7676, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9994852118455335e-05, "epoch": 0.03508771929824561, "percentage": 0.7, "elapsed_time": "0:01:33", "remaining_time": "3:40:52"} +{"current_steps": 260, "total_steps": 35625, "loss": 0.7208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9994394762328786e-05, "epoch": 0.036491228070175435, "percentage": 0.73, "elapsed_time": "0:01:38", "remaining_time": "3:43:28"} +{"current_steps": 270, "total_steps": 35625, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999391794720923e-05, "epoch": 0.037894736842105266, "percentage": 0.76, "elapsed_time": "0:01:41", "remaining_time": "3:41:42"} +{"current_steps": 280, "total_steps": 35625, "loss": 0.6751, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9993421673467906e-05, "epoch": 0.03929824561403509, "percentage": 0.79, "elapsed_time": "0:01:45", "remaining_time": "3:42:22"} +{"current_steps": 290, "total_steps": 35625, "loss": 0.7652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9992905941491155e-05, "epoch": 0.04070175438596491, "percentage": 0.81, "elapsed_time": "0:01:49", "remaining_time": "3:43:20"} +{"current_steps": 300, "total_steps": 35625, "loss": 0.7609, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9992370751680514e-05, "epoch": 0.042105263157894736, "percentage": 0.84, "elapsed_time": "0:01:53", "remaining_time": "3:42:16"} +{"current_steps": 310, "total_steps": 35625, "loss": 0.678, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999181610445263e-05, "epoch": 0.04350877192982456, "percentage": 0.87, "elapsed_time": "0:01:56", "remaining_time": "3:41:36"} +{"current_steps": 320, "total_steps": 35625, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9991242000239316e-05, "epoch": 0.04491228070175438, "percentage": 0.9, "elapsed_time": "0:01:59", "remaining_time": "3:40:24"} +{"current_steps": 330, "total_steps": 35625, "loss": 0.7034, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9990648439487544e-05, "epoch": 0.04631578947368421, "percentage": 0.93, "elapsed_time": "0:02:03", "remaining_time": "3:39:28"} +{"current_steps": 340, "total_steps": 35625, "loss": 0.6789, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999003542265941e-05, "epoch": 0.047719298245614036, "percentage": 0.95, "elapsed_time": "0:02:07", "remaining_time": "3:40:53"} +{"current_steps": 350, "total_steps": 35625, "loss": 0.6895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998940295023218e-05, "epoch": 0.04912280701754386, "percentage": 0.98, "elapsed_time": "0:02:11", "remaining_time": "3:41:23"} +{"current_steps": 360, "total_steps": 35625, "loss": 0.7472, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9988751022698244e-05, "epoch": 0.05052631578947368, "percentage": 1.01, "elapsed_time": "0:02:16", "remaining_time": "3:42:40"} +{"current_steps": 370, "total_steps": 35625, "loss": 0.7637, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9988079640565155e-05, "epoch": 0.051929824561403506, "percentage": 1.04, "elapsed_time": "0:02:21", "remaining_time": "3:43:59"} +{"current_steps": 380, "total_steps": 35625, "loss": 0.8042, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998738880435561e-05, "epoch": 0.05333333333333334, "percentage": 1.07, "elapsed_time": "0:02:24", "remaining_time": "3:42:49"} +{"current_steps": 390, "total_steps": 35625, "loss": 0.7878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9986678514607434e-05, "epoch": 0.05473684210526316, "percentage": 1.09, "elapsed_time": "0:02:27", "remaining_time": "3:41:40"} +{"current_steps": 400, "total_steps": 35625, "loss": 0.6831, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998594877187362e-05, "epoch": 0.056140350877192984, "percentage": 1.12, "elapsed_time": "0:02:30", "remaining_time": "3:41:15"} +{"current_steps": 410, "total_steps": 35625, "loss": 0.7905, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998519957672232e-05, "epoch": 0.05754385964912281, "percentage": 1.15, "elapsed_time": "0:02:35", "remaining_time": "3:43:17"} +{"current_steps": 420, "total_steps": 35625, "loss": 0.6195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998443092973678e-05, "epoch": 0.05894736842105263, "percentage": 1.18, "elapsed_time": "0:02:39", "remaining_time": "3:42:34"} +{"current_steps": 430, "total_steps": 35625, "loss": 0.7603, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998364283151542e-05, "epoch": 0.060350877192982454, "percentage": 1.21, "elapsed_time": "0:02:43", "remaining_time": "3:42:29"} +{"current_steps": 440, "total_steps": 35625, "loss": 0.7099, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9982835282671816e-05, "epoch": 0.061754385964912284, "percentage": 1.24, "elapsed_time": "0:02:47", "remaining_time": "3:43:04"} +{"current_steps": 450, "total_steps": 35625, "loss": 0.6307, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998200828383466e-05, "epoch": 0.06315789473684211, "percentage": 1.26, "elapsed_time": "0:02:51", "remaining_time": "3:43:01"} +{"current_steps": 460, "total_steps": 35625, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99811618356478e-05, "epoch": 0.06456140350877193, "percentage": 1.29, "elapsed_time": "0:02:54", "remaining_time": "3:42:32"} +{"current_steps": 470, "total_steps": 35625, "loss": 0.6897, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998029593877025e-05, "epoch": 0.06596491228070175, "percentage": 1.32, "elapsed_time": "0:02:59", "remaining_time": "3:43:30"} +{"current_steps": 480, "total_steps": 35625, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9979410593876096e-05, "epoch": 0.06736842105263158, "percentage": 1.35, "elapsed_time": "0:03:03", "remaining_time": "3:43:29"} +{"current_steps": 490, "total_steps": 35625, "loss": 0.645, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997850580165464e-05, "epoch": 0.0687719298245614, "percentage": 1.38, "elapsed_time": "0:03:07", "remaining_time": "3:44:32"} +{"current_steps": 500, "total_steps": 35625, "loss": 0.6455, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997758156281029e-05, "epoch": 0.07017543859649122, "percentage": 1.4, "elapsed_time": "0:03:12", "remaining_time": "3:44:50"} +{"current_steps": 510, "total_steps": 35625, "loss": 0.6797, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997663787806259e-05, "epoch": 0.07157894736842105, "percentage": 1.43, "elapsed_time": "0:03:16", "remaining_time": "3:45:49"} +{"current_steps": 520, "total_steps": 35625, "loss": 0.7582, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997567474814623e-05, "epoch": 0.07298245614035087, "percentage": 1.46, "elapsed_time": "0:03:21", "remaining_time": "3:46:20"} +{"current_steps": 530, "total_steps": 35625, "loss": 0.667, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997469217381105e-05, "epoch": 0.07438596491228071, "percentage": 1.49, "elapsed_time": "0:03:24", "remaining_time": "3:45:34"} +{"current_steps": 540, "total_steps": 35625, "loss": 0.6878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997369015582201e-05, "epoch": 0.07578947368421053, "percentage": 1.52, "elapsed_time": "0:03:28", "remaining_time": "3:45:15"} +{"current_steps": 550, "total_steps": 35625, "loss": 0.7693, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9972668694959216e-05, "epoch": 0.07719298245614035, "percentage": 1.54, "elapsed_time": "0:03:31", "remaining_time": "3:44:42"} +{"current_steps": 560, "total_steps": 35625, "loss": 0.561, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9971627792017915e-05, "epoch": 0.07859649122807018, "percentage": 1.57, "elapsed_time": "0:03:35", "remaining_time": "3:44:43"} +{"current_steps": 570, "total_steps": 35625, "loss": 0.6739, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997056744780848e-05, "epoch": 0.08, "percentage": 1.6, "elapsed_time": "0:03:38", "remaining_time": "3:44:27"} +{"current_steps": 580, "total_steps": 35625, "loss": 0.6561, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9969487663156434e-05, "epoch": 0.08140350877192983, "percentage": 1.63, "elapsed_time": "0:03:42", "remaining_time": "3:44:24"} +{"current_steps": 590, "total_steps": 35625, "loss": 0.6056, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9968388438902415e-05, "epoch": 0.08280701754385965, "percentage": 1.66, "elapsed_time": "0:03:46", "remaining_time": "3:44:08"} +{"current_steps": 600, "total_steps": 35625, "loss": 0.6962, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9967269775902204e-05, "epoch": 0.08421052631578947, "percentage": 1.68, "elapsed_time": "0:03:51", "remaining_time": "3:45:27"} +{"current_steps": 610, "total_steps": 35625, "loss": 0.6174, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996613167502674e-05, "epoch": 0.0856140350877193, "percentage": 1.71, "elapsed_time": "0:03:55", "remaining_time": "3:44:58"} +{"current_steps": 620, "total_steps": 35625, "loss": 0.6806, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996497413716205e-05, "epoch": 0.08701754385964912, "percentage": 1.74, "elapsed_time": "0:03:58", "remaining_time": "3:44:53"} +{"current_steps": 630, "total_steps": 35625, "loss": 0.7618, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996379716320933e-05, "epoch": 0.08842105263157894, "percentage": 1.77, "elapsed_time": "0:04:03", "remaining_time": "3:45:05"} +{"current_steps": 640, "total_steps": 35625, "loss": 0.6796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996260075408489e-05, "epoch": 0.08982456140350877, "percentage": 1.8, "elapsed_time": "0:04:06", "remaining_time": "3:44:26"} +{"current_steps": 650, "total_steps": 35625, "loss": 0.6249, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996138491072018e-05, "epoch": 0.0912280701754386, "percentage": 1.82, "elapsed_time": "0:04:10", "remaining_time": "3:44:22"} +{"current_steps": 660, "total_steps": 35625, "loss": 0.6905, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996014963406177e-05, "epoch": 0.09263157894736843, "percentage": 1.85, "elapsed_time": "0:04:13", "remaining_time": "3:44:05"} +{"current_steps": 670, "total_steps": 35625, "loss": 0.7082, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9958894925071364e-05, "epoch": 0.09403508771929825, "percentage": 1.88, "elapsed_time": "0:04:16", "remaining_time": "3:43:15"} +{"current_steps": 680, "total_steps": 35625, "loss": 0.6751, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995762078472581e-05, "epoch": 0.09543859649122807, "percentage": 1.91, "elapsed_time": "0:04:20", "remaining_time": "3:42:48"} +{"current_steps": 690, "total_steps": 35625, "loss": 0.6223, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995632721401705e-05, "epoch": 0.0968421052631579, "percentage": 1.94, "elapsed_time": "0:04:23", "remaining_time": "3:42:06"} +{"current_steps": 700, "total_steps": 35625, "loss": 0.622, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995501421395219e-05, "epoch": 0.09824561403508772, "percentage": 1.96, "elapsed_time": "0:04:27", "remaining_time": "3:42:15"} +{"current_steps": 710, "total_steps": 35625, "loss": 0.7565, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995368178555343e-05, "epoch": 0.09964912280701754, "percentage": 1.99, "elapsed_time": "0:04:30", "remaining_time": "3:41:31"} +{"current_steps": 720, "total_steps": 35625, "loss": 0.7486, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9952329929858125e-05, "epoch": 0.10105263157894737, "percentage": 2.02, "elapsed_time": "0:04:33", "remaining_time": "3:40:55"} +{"current_steps": 730, "total_steps": 35625, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995095864791873e-05, "epoch": 0.10245614035087719, "percentage": 2.05, "elapsed_time": "0:04:37", "remaining_time": "3:41:14"} +{"current_steps": 740, "total_steps": 35625, "loss": 0.7342, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994956794080285e-05, "epoch": 0.10385964912280701, "percentage": 2.08, "elapsed_time": "0:04:43", "remaining_time": "3:42:22"} +{"current_steps": 750, "total_steps": 35625, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994815780959318e-05, "epoch": 0.10526315789473684, "percentage": 2.11, "elapsed_time": "0:04:48", "remaining_time": "3:43:24"} +{"current_steps": 760, "total_steps": 35625, "loss": 0.5675, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994672825538757e-05, "epoch": 0.10666666666666667, "percentage": 2.13, "elapsed_time": "0:04:52", "remaining_time": "3:43:26"} +{"current_steps": 770, "total_steps": 35625, "loss": 0.7527, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994527927929897e-05, "epoch": 0.1080701754385965, "percentage": 2.16, "elapsed_time": "0:04:56", "remaining_time": "3:43:20"} +{"current_steps": 780, "total_steps": 35625, "loss": 0.6421, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9943810882455454e-05, "epoch": 0.10947368421052632, "percentage": 2.19, "elapsed_time": "0:04:59", "remaining_time": "3:43:14"} +{"current_steps": 790, "total_steps": 35625, "loss": 0.7016, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994232306600023e-05, "epoch": 0.11087719298245614, "percentage": 2.22, "elapsed_time": "0:05:04", "remaining_time": "3:43:41"} +{"current_steps": 800, "total_steps": 35625, "loss": 0.6911, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99408158310916e-05, "epoch": 0.11228070175438597, "percentage": 2.25, "elapsed_time": "0:05:07", "remaining_time": "3:43:21"} +{"current_steps": 810, "total_steps": 35625, "loss": 0.697, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9939289178903016e-05, "epoch": 0.11368421052631579, "percentage": 2.27, "elapsed_time": "0:05:11", "remaining_time": "3:43:19"} +{"current_steps": 820, "total_steps": 35625, "loss": 0.5691, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993774311062301e-05, "epoch": 0.11508771929824561, "percentage": 2.3, "elapsed_time": "0:05:14", "remaining_time": "3:42:43"} +{"current_steps": 830, "total_steps": 35625, "loss": 0.7744, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993617762745526e-05, "epoch": 0.11649122807017544, "percentage": 2.33, "elapsed_time": "0:05:17", "remaining_time": "3:42:09"} +{"current_steps": 840, "total_steps": 35625, "loss": 0.7652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993459273061855e-05, "epoch": 0.11789473684210526, "percentage": 2.36, "elapsed_time": "0:05:21", "remaining_time": "3:41:36"} +{"current_steps": 850, "total_steps": 35625, "loss": 0.6843, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993298842134677e-05, "epoch": 0.11929824561403508, "percentage": 2.39, "elapsed_time": "0:05:24", "remaining_time": "3:41:30"} +{"current_steps": 860, "total_steps": 35625, "loss": 0.7147, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993136470088894e-05, "epoch": 0.12070175438596491, "percentage": 2.41, "elapsed_time": "0:05:28", "remaining_time": "3:41:27"} +{"current_steps": 870, "total_steps": 35625, "loss": 0.641, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992972157050916e-05, "epoch": 0.12210526315789473, "percentage": 2.44, "elapsed_time": "0:05:32", "remaining_time": "3:41:41"} +{"current_steps": 880, "total_steps": 35625, "loss": 0.6212, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992805903148669e-05, "epoch": 0.12350877192982457, "percentage": 2.47, "elapsed_time": "0:05:35", "remaining_time": "3:41:01"} +{"current_steps": 890, "total_steps": 35625, "loss": 0.6817, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992637708511586e-05, "epoch": 0.12491228070175439, "percentage": 2.5, "elapsed_time": "0:05:40", "remaining_time": "3:41:17"} +{"current_steps": 900, "total_steps": 35625, "loss": 0.7072, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9924675732706123e-05, "epoch": 0.12631578947368421, "percentage": 2.53, "elapsed_time": "0:05:44", "remaining_time": "3:41:15"} +{"current_steps": 910, "total_steps": 35625, "loss": 0.6221, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992295497558204e-05, "epoch": 0.12771929824561404, "percentage": 2.55, "elapsed_time": "0:05:49", "remaining_time": "3:42:02"} +{"current_steps": 920, "total_steps": 35625, "loss": 0.6162, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992121481508328e-05, "epoch": 0.12912280701754386, "percentage": 2.58, "elapsed_time": "0:05:52", "remaining_time": "3:41:36"} +{"current_steps": 930, "total_steps": 35625, "loss": 0.7548, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9919455252564624e-05, "epoch": 0.13052631578947368, "percentage": 2.61, "elapsed_time": "0:05:55", "remaining_time": "3:41:12"} +{"current_steps": 940, "total_steps": 35625, "loss": 0.6377, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991767628939594e-05, "epoch": 0.1319298245614035, "percentage": 2.64, "elapsed_time": "0:05:58", "remaining_time": "3:40:42"} +{"current_steps": 950, "total_steps": 35625, "loss": 0.6971, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991587792696223e-05, "epoch": 0.13333333333333333, "percentage": 2.67, "elapsed_time": "0:06:03", "remaining_time": "3:41:04"} +{"current_steps": 960, "total_steps": 35625, "loss": 0.7929, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991406016666356e-05, "epoch": 0.13473684210526315, "percentage": 2.69, "elapsed_time": "0:06:06", "remaining_time": "3:40:29"} +{"current_steps": 970, "total_steps": 35625, "loss": 0.7556, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9912223009915126e-05, "epoch": 0.13614035087719298, "percentage": 2.72, "elapsed_time": "0:06:09", "remaining_time": "3:39:56"} +{"current_steps": 980, "total_steps": 35625, "loss": 0.5883, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991036645814722e-05, "epoch": 0.1375438596491228, "percentage": 2.75, "elapsed_time": "0:06:12", "remaining_time": "3:39:28"} +{"current_steps": 990, "total_steps": 35625, "loss": 0.6362, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9908490512805236e-05, "epoch": 0.13894736842105262, "percentage": 2.78, "elapsed_time": "0:06:16", "remaining_time": "3:39:41"} +{"current_steps": 1000, "total_steps": 35625, "loss": 0.7057, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990659517534966e-05, "epoch": 0.14035087719298245, "percentage": 2.81, "elapsed_time": "0:06:21", "remaining_time": "3:39:52"} +{"current_steps": 1010, "total_steps": 35625, "loss": 0.6051, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990468044725606e-05, "epoch": 0.14175438596491227, "percentage": 2.84, "elapsed_time": "0:06:25", "remaining_time": "3:40:04"} +{"current_steps": 1020, "total_steps": 35625, "loss": 0.7434, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990274633001514e-05, "epoch": 0.1431578947368421, "percentage": 2.86, "elapsed_time": "0:06:28", "remaining_time": "3:39:42"} +{"current_steps": 1030, "total_steps": 35625, "loss": 0.6681, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990079282513266e-05, "epoch": 0.14456140350877192, "percentage": 2.89, "elapsed_time": "0:06:32", "remaining_time": "3:39:49"} +{"current_steps": 1040, "total_steps": 35625, "loss": 0.6655, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9898819934129506e-05, "epoch": 0.14596491228070174, "percentage": 2.92, "elapsed_time": "0:06:38", "remaining_time": "3:40:37"} +{"current_steps": 1050, "total_steps": 35625, "loss": 0.7529, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989682765854163e-05, "epoch": 0.14736842105263157, "percentage": 2.95, "elapsed_time": "0:06:41", "remaining_time": "3:40:26"} +{"current_steps": 1060, "total_steps": 35625, "loss": 0.6249, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989481599992009e-05, "epoch": 0.14877192982456142, "percentage": 2.98, "elapsed_time": "0:06:44", "remaining_time": "3:39:53"} +{"current_steps": 1070, "total_steps": 35625, "loss": 0.6437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989278495983103e-05, "epoch": 0.15017543859649124, "percentage": 3.0, "elapsed_time": "0:06:48", "remaining_time": "3:40:05"} +{"current_steps": 1080, "total_steps": 35625, "loss": 0.6206, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989073453985569e-05, "epoch": 0.15157894736842106, "percentage": 3.03, "elapsed_time": "0:06:52", "remaining_time": "3:40:05"} +{"current_steps": 1090, "total_steps": 35625, "loss": 0.6141, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.988866474159037e-05, "epoch": 0.1529824561403509, "percentage": 3.06, "elapsed_time": "0:06:56", "remaining_time": "3:39:51"} +{"current_steps": 1100, "total_steps": 35625, "loss": 0.6653, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.988657556664652e-05, "epoch": 0.1543859649122807, "percentage": 3.09, "elapsed_time": "0:06:59", "remaining_time": "3:39:34"} +{"current_steps": 1110, "total_steps": 35625, "loss": 0.6503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.98844670166506e-05, "epoch": 0.15578947368421053, "percentage": 3.12, "elapsed_time": "0:07:03", "remaining_time": "3:39:15"} +{"current_steps": 1120, "total_steps": 35625, "loss": 0.6298, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.98823390932442e-05, "epoch": 0.15719298245614036, "percentage": 3.14, "elapsed_time": "0:07:06", "remaining_time": "3:38:51"} +{"current_steps": 1130, "total_steps": 35625, "loss": 0.6889, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.988019179808398e-05, "epoch": 0.15859649122807018, "percentage": 3.17, "elapsed_time": "0:07:10", "remaining_time": "3:38:49"} +{"current_steps": 1140, "total_steps": 35625, "loss": 0.7086, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987802513284169e-05, "epoch": 0.16, "percentage": 3.2, "elapsed_time": "0:07:14", "remaining_time": "3:39:04"} +{"current_steps": 1150, "total_steps": 35625, "loss": 0.6727, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9875839099204134e-05, "epoch": 0.16140350877192983, "percentage": 3.23, "elapsed_time": "0:07:18", "remaining_time": "3:39:09"} +{"current_steps": 1160, "total_steps": 35625, "loss": 0.6993, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987363369887324e-05, "epoch": 0.16280701754385965, "percentage": 3.26, "elapsed_time": "0:07:22", "remaining_time": "3:39:05"} +{"current_steps": 1170, "total_steps": 35625, "loss": 0.5671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987140893356597e-05, "epoch": 0.16421052631578947, "percentage": 3.28, "elapsed_time": "0:07:26", "remaining_time": "3:39:21"} +{"current_steps": 1180, "total_steps": 35625, "loss": 0.6929, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.986916480501438e-05, "epoch": 0.1656140350877193, "percentage": 3.31, "elapsed_time": "0:07:31", "remaining_time": "3:39:29"} +{"current_steps": 1190, "total_steps": 35625, "loss": 0.5368, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.986690131496561e-05, "epoch": 0.16701754385964912, "percentage": 3.34, "elapsed_time": "0:07:34", "remaining_time": "3:39:07"} +{"current_steps": 1200, "total_steps": 35625, "loss": 0.6473, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.986461846518186e-05, "epoch": 0.16842105263157894, "percentage": 3.37, "elapsed_time": "0:07:40", "remaining_time": "3:39:57"} +{"current_steps": 1210, "total_steps": 35625, "loss": 0.6698, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.986231625744041e-05, "epoch": 0.16982456140350877, "percentage": 3.4, "elapsed_time": "0:07:44", "remaining_time": "3:40:09"} +{"current_steps": 1220, "total_steps": 35625, "loss": 0.6747, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985999469353359e-05, "epoch": 0.1712280701754386, "percentage": 3.42, "elapsed_time": "0:07:48", "remaining_time": "3:40:01"} +{"current_steps": 1230, "total_steps": 35625, "loss": 0.6509, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9857653775268853e-05, "epoch": 0.1726315789473684, "percentage": 3.45, "elapsed_time": "0:07:52", "remaining_time": "3:40:12"} +{"current_steps": 1240, "total_steps": 35625, "loss": 0.6895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985529350446865e-05, "epoch": 0.17403508771929824, "percentage": 3.48, "elapsed_time": "0:07:57", "remaining_time": "3:40:38"} +{"current_steps": 1250, "total_steps": 35625, "loss": 0.5882, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985291388297055e-05, "epoch": 0.17543859649122806, "percentage": 3.51, "elapsed_time": "0:08:01", "remaining_time": "3:40:51"} +{"current_steps": 1260, "total_steps": 35625, "loss": 0.6688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985051491262716e-05, "epoch": 0.17684210526315788, "percentage": 3.54, "elapsed_time": "0:08:07", "remaining_time": "3:41:32"} +{"current_steps": 1270, "total_steps": 35625, "loss": 0.6135, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.984809659530617e-05, "epoch": 0.1782456140350877, "percentage": 3.56, "elapsed_time": "0:08:11", "remaining_time": "3:41:30"} +{"current_steps": 1280, "total_steps": 35625, "loss": 0.6849, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9845658932890315e-05, "epoch": 0.17964912280701753, "percentage": 3.59, "elapsed_time": "0:08:14", "remaining_time": "3:41:04"} +{"current_steps": 1290, "total_steps": 35625, "loss": 0.6036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9843201927277407e-05, "epoch": 0.18105263157894738, "percentage": 3.62, "elapsed_time": "0:08:17", "remaining_time": "3:40:40"} +{"current_steps": 1300, "total_steps": 35625, "loss": 0.7348, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.984072558038031e-05, "epoch": 0.1824561403508772, "percentage": 3.65, "elapsed_time": "0:08:21", "remaining_time": "3:40:42"} +{"current_steps": 1310, "total_steps": 35625, "loss": 0.7679, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.983822989412693e-05, "epoch": 0.18385964912280703, "percentage": 3.68, "elapsed_time": "0:08:26", "remaining_time": "3:40:58"} +{"current_steps": 1320, "total_steps": 35625, "loss": 0.7083, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.983571487046026e-05, "epoch": 0.18526315789473685, "percentage": 3.71, "elapsed_time": "0:08:31", "remaining_time": "3:41:29"} +{"current_steps": 1330, "total_steps": 35625, "loss": 0.6417, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9833180511338314e-05, "epoch": 0.18666666666666668, "percentage": 3.73, "elapsed_time": "0:08:34", "remaining_time": "3:41:03"} +{"current_steps": 1340, "total_steps": 35625, "loss": 0.6817, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.983062681873421e-05, "epoch": 0.1880701754385965, "percentage": 3.76, "elapsed_time": "0:08:37", "remaining_time": "3:40:45"} +{"current_steps": 1350, "total_steps": 35625, "loss": 0.6554, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982805379463605e-05, "epoch": 0.18947368421052632, "percentage": 3.79, "elapsed_time": "0:08:40", "remaining_time": "3:40:16"} +{"current_steps": 1360, "total_steps": 35625, "loss": 0.6613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982546144104704e-05, "epoch": 0.19087719298245615, "percentage": 3.82, "elapsed_time": "0:08:45", "remaining_time": "3:40:32"} +{"current_steps": 1370, "total_steps": 35625, "loss": 0.6902, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982284975998541e-05, "epoch": 0.19228070175438597, "percentage": 3.85, "elapsed_time": "0:08:48", "remaining_time": "3:40:13"} +{"current_steps": 1380, "total_steps": 35625, "loss": 0.81, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982021875348445e-05, "epoch": 0.1936842105263158, "percentage": 3.87, "elapsed_time": "0:08:52", "remaining_time": "3:40:19"} +{"current_steps": 1390, "total_steps": 35625, "loss": 0.6796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9817568423592484e-05, "epoch": 0.19508771929824562, "percentage": 3.9, "elapsed_time": "0:08:56", "remaining_time": "3:40:21"} +{"current_steps": 1400, "total_steps": 35625, "loss": 0.6109, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.981489877237288e-05, "epoch": 0.19649122807017544, "percentage": 3.93, "elapsed_time": "0:09:00", "remaining_time": "3:40:04"} +{"current_steps": 1410, "total_steps": 35625, "loss": 0.7521, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9812209801904064e-05, "epoch": 0.19789473684210526, "percentage": 3.96, "elapsed_time": "0:09:04", "remaining_time": "3:40:09"} +{"current_steps": 1420, "total_steps": 35625, "loss": 0.6742, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.980950151427948e-05, "epoch": 0.19929824561403509, "percentage": 3.99, "elapsed_time": "0:09:08", "remaining_time": "3:40:04"} +{"current_steps": 1430, "total_steps": 35625, "loss": 0.659, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.980677391160763e-05, "epoch": 0.2007017543859649, "percentage": 4.01, "elapsed_time": "0:09:11", "remaining_time": "3:39:41"} +{"current_steps": 1440, "total_steps": 35625, "loss": 0.6392, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.980402699601205e-05, "epoch": 0.20210526315789473, "percentage": 4.04, "elapsed_time": "0:09:15", "remaining_time": "3:39:42"} +{"current_steps": 1450, "total_steps": 35625, "loss": 0.6674, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.98012607696313e-05, "epoch": 0.20350877192982456, "percentage": 4.07, "elapsed_time": "0:09:20", "remaining_time": "3:40:00"} +{"current_steps": 1460, "total_steps": 35625, "loss": 0.6772, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.979847523461898e-05, "epoch": 0.20491228070175438, "percentage": 4.1, "elapsed_time": "0:09:24", "remaining_time": "3:40:03"} +{"current_steps": 1470, "total_steps": 35625, "loss": 0.6598, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9795670393143735e-05, "epoch": 0.2063157894736842, "percentage": 4.13, "elapsed_time": "0:09:29", "remaining_time": "3:40:30"} +{"current_steps": 1480, "total_steps": 35625, "loss": 0.6784, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9792846247389214e-05, "epoch": 0.20771929824561403, "percentage": 4.15, "elapsed_time": "0:09:32", "remaining_time": "3:40:16"} +{"current_steps": 1490, "total_steps": 35625, "loss": 0.673, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.979000279955413e-05, "epoch": 0.20912280701754385, "percentage": 4.18, "elapsed_time": "0:09:37", "remaining_time": "3:40:29"} +{"current_steps": 1500, "total_steps": 35625, "loss": 0.6018, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.97871400518522e-05, "epoch": 0.21052631578947367, "percentage": 4.21, "elapsed_time": "0:09:41", "remaining_time": "3:40:24"} +{"current_steps": 1510, "total_steps": 35625, "loss": 0.673, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.978425800651216e-05, "epoch": 0.2119298245614035, "percentage": 4.24, "elapsed_time": "0:09:45", "remaining_time": "3:40:20"} +{"current_steps": 1520, "total_steps": 35625, "loss": 0.6455, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.978135666577779e-05, "epoch": 0.21333333333333335, "percentage": 4.27, "elapsed_time": "0:09:48", "remaining_time": "3:39:58"} +{"current_steps": 1530, "total_steps": 35625, "loss": 0.6945, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.977843603190788e-05, "epoch": 0.21473684210526317, "percentage": 4.29, "elapsed_time": "0:09:51", "remaining_time": "3:39:39"} +{"current_steps": 1540, "total_steps": 35625, "loss": 0.6441, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9775496107176245e-05, "epoch": 0.216140350877193, "percentage": 4.32, "elapsed_time": "0:09:54", "remaining_time": "3:39:19"} +{"current_steps": 1550, "total_steps": 35625, "loss": 0.6399, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.977253689387172e-05, "epoch": 0.21754385964912282, "percentage": 4.35, "elapsed_time": "0:09:57", "remaining_time": "3:38:58"} +{"current_steps": 1560, "total_steps": 35625, "loss": 0.7059, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976955839429815e-05, "epoch": 0.21894736842105264, "percentage": 4.38, "elapsed_time": "0:10:01", "remaining_time": "3:38:53"} +{"current_steps": 1570, "total_steps": 35625, "loss": 0.7422, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976656061077441e-05, "epoch": 0.22035087719298246, "percentage": 4.41, "elapsed_time": "0:10:06", "remaining_time": "3:39:11"} +{"current_steps": 1580, "total_steps": 35625, "loss": 0.7, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976354354563435e-05, "epoch": 0.2217543859649123, "percentage": 4.44, "elapsed_time": "0:10:10", "remaining_time": "3:39:04"} +{"current_steps": 1590, "total_steps": 35625, "loss": 0.693, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976050720122688e-05, "epoch": 0.2231578947368421, "percentage": 4.46, "elapsed_time": "0:10:13", "remaining_time": "3:39:01"} +{"current_steps": 1600, "total_steps": 35625, "loss": 0.6477, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.97574515799159e-05, "epoch": 0.22456140350877193, "percentage": 4.49, "elapsed_time": "0:10:17", "remaining_time": "3:38:41"} +{"current_steps": 1610, "total_steps": 35625, "loss": 0.6839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.975437668408031e-05, "epoch": 0.22596491228070176, "percentage": 4.52, "elapsed_time": "0:10:21", "remaining_time": "3:38:57"} +{"current_steps": 1620, "total_steps": 35625, "loss": 0.5796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9751282516114024e-05, "epoch": 0.22736842105263158, "percentage": 4.55, "elapsed_time": "0:10:25", "remaining_time": "3:38:50"} +{"current_steps": 1630, "total_steps": 35625, "loss": 0.6967, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9748169078425955e-05, "epoch": 0.2287719298245614, "percentage": 4.58, "elapsed_time": "0:10:28", "remaining_time": "3:38:33"} +{"current_steps": 1640, "total_steps": 35625, "loss": 0.5507, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.974503637344002e-05, "epoch": 0.23017543859649123, "percentage": 4.6, "elapsed_time": "0:10:31", "remaining_time": "3:38:12"} +{"current_steps": 1650, "total_steps": 35625, "loss": 0.6792, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9741884403595135e-05, "epoch": 0.23157894736842105, "percentage": 4.63, "elapsed_time": "0:10:35", "remaining_time": "3:37:59"} +{"current_steps": 1660, "total_steps": 35625, "loss": 0.616, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9738713171345225e-05, "epoch": 0.23298245614035087, "percentage": 4.66, "elapsed_time": "0:10:38", "remaining_time": "3:37:38"} +{"current_steps": 1670, "total_steps": 35625, "loss": 0.635, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9735522679159195e-05, "epoch": 0.2343859649122807, "percentage": 4.69, "elapsed_time": "0:10:41", "remaining_time": "3:37:32"} +{"current_steps": 1680, "total_steps": 35625, "loss": 0.6902, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9732312929520964e-05, "epoch": 0.23578947368421052, "percentage": 4.72, "elapsed_time": "0:10:45", "remaining_time": "3:37:25"} +{"current_steps": 1690, "total_steps": 35625, "loss": 0.6899, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.972908392492942e-05, "epoch": 0.23719298245614034, "percentage": 4.74, "elapsed_time": "0:10:48", "remaining_time": "3:37:05"} +{"current_steps": 1700, "total_steps": 35625, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9725835667898455e-05, "epoch": 0.23859649122807017, "percentage": 4.77, "elapsed_time": "0:10:52", "remaining_time": "3:37:10"} +{"current_steps": 1710, "total_steps": 35625, "loss": 0.6379, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.972256816095695e-05, "epoch": 0.24, "percentage": 4.8, "elapsed_time": "0:10:56", "remaining_time": "3:36:56"} +{"current_steps": 1720, "total_steps": 35625, "loss": 0.6819, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.971928140664878e-05, "epoch": 0.24140350877192981, "percentage": 4.83, "elapsed_time": "0:10:59", "remaining_time": "3:36:41"} +{"current_steps": 1730, "total_steps": 35625, "loss": 0.6888, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.971597540753279e-05, "epoch": 0.24280701754385964, "percentage": 4.86, "elapsed_time": "0:11:04", "remaining_time": "3:36:54"} +{"current_steps": 1740, "total_steps": 35625, "loss": 0.6761, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.971265016618281e-05, "epoch": 0.24421052631578946, "percentage": 4.88, "elapsed_time": "0:11:07", "remaining_time": "3:36:42"} +{"current_steps": 1750, "total_steps": 35625, "loss": 0.5544, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.970930568518765e-05, "epoch": 0.24561403508771928, "percentage": 4.91, "elapsed_time": "0:11:12", "remaining_time": "3:36:51"} +{"current_steps": 1760, "total_steps": 35625, "loss": 0.7072, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.97059419671511e-05, "epoch": 0.24701754385964914, "percentage": 4.94, "elapsed_time": "0:11:18", "remaining_time": "3:37:26"} +{"current_steps": 1770, "total_steps": 35625, "loss": 0.5992, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9702559014691965e-05, "epoch": 0.24842105263157896, "percentage": 4.97, "elapsed_time": "0:11:22", "remaining_time": "3:37:29"} +{"current_steps": 1780, "total_steps": 35625, "loss": 0.6277, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.969915683044395e-05, "epoch": 0.24982456140350878, "percentage": 5.0, "elapsed_time": "0:11:26", "remaining_time": "3:37:38"} +{"current_steps": 1790, "total_steps": 35625, "loss": 0.5501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9695735417055776e-05, "epoch": 0.2512280701754386, "percentage": 5.02, "elapsed_time": "0:11:29", "remaining_time": "3:37:19"} +{"current_steps": 1800, "total_steps": 35625, "loss": 0.7619, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.969229477719116e-05, "epoch": 0.25263157894736843, "percentage": 5.05, "elapsed_time": "0:11:34", "remaining_time": "3:37:27"} +{"current_steps": 1810, "total_steps": 35625, "loss": 0.6706, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9688834913528724e-05, "epoch": 0.2540350877192982, "percentage": 5.08, "elapsed_time": "0:11:37", "remaining_time": "3:37:19"} +{"current_steps": 1820, "total_steps": 35625, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9685355828762115e-05, "epoch": 0.2554385964912281, "percentage": 5.11, "elapsed_time": "0:11:41", "remaining_time": "3:37:13"} +{"current_steps": 1830, "total_steps": 35625, "loss": 0.7195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.96818575255999e-05, "epoch": 0.25684210526315787, "percentage": 5.14, "elapsed_time": "0:11:44", "remaining_time": "3:36:55"} +{"current_steps": 1840, "total_steps": 35625, "loss": 0.6196, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.967834000676564e-05, "epoch": 0.2582456140350877, "percentage": 5.16, "elapsed_time": "0:11:48", "remaining_time": "3:36:40"} +{"current_steps": 1850, "total_steps": 35625, "loss": 0.5768, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.967480327499785e-05, "epoch": 0.2596491228070175, "percentage": 5.19, "elapsed_time": "0:11:52", "remaining_time": "3:36:53"} +{"current_steps": 1860, "total_steps": 35625, "loss": 0.6484, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9671247333049975e-05, "epoch": 0.26105263157894737, "percentage": 5.22, "elapsed_time": "0:11:56", "remaining_time": "3:36:44"} +{"current_steps": 1870, "total_steps": 35625, "loss": 0.6132, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966767218369046e-05, "epoch": 0.2624561403508772, "percentage": 5.25, "elapsed_time": "0:11:59", "remaining_time": "3:36:33"} +{"current_steps": 1880, "total_steps": 35625, "loss": 0.6435, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966407782970267e-05, "epoch": 0.263859649122807, "percentage": 5.28, "elapsed_time": "0:12:02", "remaining_time": "3:36:16"} +{"current_steps": 1890, "total_steps": 35625, "loss": 0.6581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966046427388494e-05, "epoch": 0.26526315789473687, "percentage": 5.31, "elapsed_time": "0:12:06", "remaining_time": "3:35:58"} +{"current_steps": 1900, "total_steps": 35625, "loss": 0.593, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.965683151905054e-05, "epoch": 0.26666666666666666, "percentage": 5.33, "elapsed_time": "0:12:09", "remaining_time": "3:35:44"} +{"current_steps": 1910, "total_steps": 35625, "loss": 0.7037, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.965317956802769e-05, "epoch": 0.2680701754385965, "percentage": 5.36, "elapsed_time": "0:12:12", "remaining_time": "3:35:29"} +{"current_steps": 1920, "total_steps": 35625, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.964950842365957e-05, "epoch": 0.2694736842105263, "percentage": 5.39, "elapsed_time": "0:12:15", "remaining_time": "3:35:14"} +{"current_steps": 1930, "total_steps": 35625, "loss": 0.6463, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9645818088804284e-05, "epoch": 0.27087719298245616, "percentage": 5.42, "elapsed_time": "0:12:21", "remaining_time": "3:35:45"} +{"current_steps": 1940, "total_steps": 35625, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.964210856633489e-05, "epoch": 0.27228070175438596, "percentage": 5.45, "elapsed_time": "0:12:26", "remaining_time": "3:36:02"} +{"current_steps": 1950, "total_steps": 35625, "loss": 0.7659, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.963837985913938e-05, "epoch": 0.2736842105263158, "percentage": 5.47, "elapsed_time": "0:12:29", "remaining_time": "3:35:50"} +{"current_steps": 1960, "total_steps": 35625, "loss": 0.7388, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.963463197012067e-05, "epoch": 0.2750877192982456, "percentage": 5.5, "elapsed_time": "0:12:32", "remaining_time": "3:35:32"} +{"current_steps": 1970, "total_steps": 35625, "loss": 0.6961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9630864902196626e-05, "epoch": 0.27649122807017545, "percentage": 5.53, "elapsed_time": "0:12:36", "remaining_time": "3:35:32"} +{"current_steps": 1980, "total_steps": 35625, "loss": 0.5755, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.962707865830004e-05, "epoch": 0.27789473684210525, "percentage": 5.56, "elapsed_time": "0:12:41", "remaining_time": "3:35:33"} +{"current_steps": 1990, "total_steps": 35625, "loss": 0.6845, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9623273241378636e-05, "epoch": 0.2792982456140351, "percentage": 5.59, "elapsed_time": "0:12:44", "remaining_time": "3:35:19"} +{"current_steps": 2000, "total_steps": 35625, "loss": 0.7236, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9619448654395055e-05, "epoch": 0.2807017543859649, "percentage": 5.61, "elapsed_time": "0:12:49", "remaining_time": "3:35:33"} +{"current_steps": 2000, "total_steps": 35625, "loss": null, "eval_loss": 0.6709622740745544, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.2807017543859649, "percentage": 5.61, "elapsed_time": "0:12:49", "remaining_time": "3:35:33"} +{"current_steps": 2010, "total_steps": 35625, "loss": 0.5944, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9615604900326875e-05, "epoch": 0.28210526315789475, "percentage": 5.64, "elapsed_time": "0:13:36", "remaining_time": "3:47:41"} +{"current_steps": 2020, "total_steps": 35625, "loss": 0.6174, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.961174198216658e-05, "epoch": 0.28350877192982454, "percentage": 5.67, "elapsed_time": "0:13:40", "remaining_time": "3:47:36"} +{"current_steps": 2030, "total_steps": 35625, "loss": 0.6801, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9607859902921595e-05, "epoch": 0.2849122807017544, "percentage": 5.7, "elapsed_time": "0:13:44", "remaining_time": "3:47:24"} +{"current_steps": 2040, "total_steps": 35625, "loss": 0.6657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.960395866561425e-05, "epoch": 0.2863157894736842, "percentage": 5.73, "elapsed_time": "0:13:49", "remaining_time": "3:47:31"} +{"current_steps": 2050, "total_steps": 35625, "loss": 0.6536, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.960003827328179e-05, "epoch": 0.28771929824561404, "percentage": 5.75, "elapsed_time": "0:13:52", "remaining_time": "3:47:16"} +{"current_steps": 2060, "total_steps": 35625, "loss": 0.6361, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.959609872897637e-05, "epoch": 0.28912280701754384, "percentage": 5.78, "elapsed_time": "0:13:56", "remaining_time": "3:47:14"} +{"current_steps": 2070, "total_steps": 35625, "loss": 0.6369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.959214003576507e-05, "epoch": 0.2905263157894737, "percentage": 5.81, "elapsed_time": "0:14:00", "remaining_time": "3:47:05"} +{"current_steps": 2080, "total_steps": 35625, "loss": 0.7563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.958816219672986e-05, "epoch": 0.2919298245614035, "percentage": 5.84, "elapsed_time": "0:14:04", "remaining_time": "3:46:58"} +{"current_steps": 2090, "total_steps": 35625, "loss": 0.6575, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9584165214967634e-05, "epoch": 0.29333333333333333, "percentage": 5.87, "elapsed_time": "0:14:07", "remaining_time": "3:46:37"} +{"current_steps": 2100, "total_steps": 35625, "loss": 0.6611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9580149093590165e-05, "epoch": 0.29473684210526313, "percentage": 5.89, "elapsed_time": "0:14:11", "remaining_time": "3:46:29"} +{"current_steps": 2110, "total_steps": 35625, "loss": 0.6456, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.957611383572415e-05, "epoch": 0.296140350877193, "percentage": 5.92, "elapsed_time": "0:14:14", "remaining_time": "3:46:15"} +{"current_steps": 2120, "total_steps": 35625, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9572059444511175e-05, "epoch": 0.29754385964912283, "percentage": 5.95, "elapsed_time": "0:14:18", "remaining_time": "3:46:11"} +{"current_steps": 2130, "total_steps": 35625, "loss": 0.6338, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.956798592310773e-05, "epoch": 0.29894736842105263, "percentage": 5.98, "elapsed_time": "0:14:22", "remaining_time": "3:45:56"} +{"current_steps": 2140, "total_steps": 35625, "loss": 0.6323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.956389327468518e-05, "epoch": 0.3003508771929825, "percentage": 6.01, "elapsed_time": "0:14:26", "remaining_time": "3:46:01"} +{"current_steps": 2150, "total_steps": 35625, "loss": 0.6231, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9559781502429784e-05, "epoch": 0.3017543859649123, "percentage": 6.04, "elapsed_time": "0:14:30", "remaining_time": "3:45:55"} +{"current_steps": 2160, "total_steps": 35625, "loss": 0.6515, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.955565060954272e-05, "epoch": 0.3031578947368421, "percentage": 6.06, "elapsed_time": "0:14:34", "remaining_time": "3:45:46"} +{"current_steps": 2170, "total_steps": 35625, "loss": 0.6023, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9551500599240006e-05, "epoch": 0.3045614035087719, "percentage": 6.09, "elapsed_time": "0:14:38", "remaining_time": "3:45:44"} +{"current_steps": 2180, "total_steps": 35625, "loss": 0.678, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.954733147475259e-05, "epoch": 0.3059649122807018, "percentage": 6.12, "elapsed_time": "0:14:41", "remaining_time": "3:45:27"} +{"current_steps": 2190, "total_steps": 35625, "loss": 0.6307, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.954314323932627e-05, "epoch": 0.30736842105263157, "percentage": 6.15, "elapsed_time": "0:14:46", "remaining_time": "3:45:28"} +{"current_steps": 2200, "total_steps": 35625, "loss": 0.6543, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.953893589622172e-05, "epoch": 0.3087719298245614, "percentage": 6.18, "elapsed_time": "0:14:50", "remaining_time": "3:45:22"} +{"current_steps": 2210, "total_steps": 35625, "loss": 0.7493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9534709448714514e-05, "epoch": 0.3101754385964912, "percentage": 6.2, "elapsed_time": "0:14:53", "remaining_time": "3:45:08"} +{"current_steps": 2220, "total_steps": 35625, "loss": 0.6856, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9530463900095084e-05, "epoch": 0.31157894736842107, "percentage": 6.23, "elapsed_time": "0:14:57", "remaining_time": "3:45:02"} +{"current_steps": 2230, "total_steps": 35625, "loss": 0.5721, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.952619925366873e-05, "epoch": 0.31298245614035086, "percentage": 6.26, "elapsed_time": "0:15:00", "remaining_time": "3:44:46"} +{"current_steps": 2240, "total_steps": 35625, "loss": 0.7126, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9521915512755635e-05, "epoch": 0.3143859649122807, "percentage": 6.29, "elapsed_time": "0:15:03", "remaining_time": "3:44:32"} +{"current_steps": 2250, "total_steps": 35625, "loss": 0.7522, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.951761268069082e-05, "epoch": 0.3157894736842105, "percentage": 6.32, "elapsed_time": "0:15:07", "remaining_time": "3:44:26"} +{"current_steps": 2260, "total_steps": 35625, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.95132907608242e-05, "epoch": 0.31719298245614036, "percentage": 6.34, "elapsed_time": "0:15:13", "remaining_time": "3:44:44"} +{"current_steps": 2270, "total_steps": 35625, "loss": 0.7592, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.950894975652055e-05, "epoch": 0.31859649122807016, "percentage": 6.37, "elapsed_time": "0:15:16", "remaining_time": "3:44:28"} +{"current_steps": 2280, "total_steps": 35625, "loss": 0.5719, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.950458967115946e-05, "epoch": 0.32, "percentage": 6.4, "elapsed_time": "0:15:21", "remaining_time": "3:44:42"} +{"current_steps": 2290, "total_steps": 35625, "loss": 0.6288, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9500210508135436e-05, "epoch": 0.3214035087719298, "percentage": 6.43, "elapsed_time": "0:15:25", "remaining_time": "3:44:34"} +{"current_steps": 2300, "total_steps": 35625, "loss": 0.7081, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9495812270857786e-05, "epoch": 0.32280701754385965, "percentage": 6.46, "elapsed_time": "0:15:30", "remaining_time": "3:44:35"} +{"current_steps": 2310, "total_steps": 35625, "loss": 0.6371, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.94913949627507e-05, "epoch": 0.32421052631578945, "percentage": 6.48, "elapsed_time": "0:15:33", "remaining_time": "3:44:18"} +{"current_steps": 2320, "total_steps": 35625, "loss": 0.6712, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9486958587253195e-05, "epoch": 0.3256140350877193, "percentage": 6.51, "elapsed_time": "0:15:36", "remaining_time": "3:44:00"} +{"current_steps": 2330, "total_steps": 35625, "loss": 0.6232, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9482503147819156e-05, "epoch": 0.3270175438596491, "percentage": 6.54, "elapsed_time": "0:15:40", "remaining_time": "3:43:52"} +{"current_steps": 2340, "total_steps": 35625, "loss": 0.6519, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.947802864791727e-05, "epoch": 0.32842105263157895, "percentage": 6.57, "elapsed_time": "0:15:44", "remaining_time": "3:43:48"} +{"current_steps": 2350, "total_steps": 35625, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.947353509103112e-05, "epoch": 0.3298245614035088, "percentage": 6.6, "elapsed_time": "0:15:47", "remaining_time": "3:43:39"} +{"current_steps": 2360, "total_steps": 35625, "loss": 0.6784, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.946902248065907e-05, "epoch": 0.3312280701754386, "percentage": 6.62, "elapsed_time": "0:15:51", "remaining_time": "3:43:37"} +{"current_steps": 2370, "total_steps": 35625, "loss": 0.612, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.946449082031435e-05, "epoch": 0.33263157894736844, "percentage": 6.65, "elapsed_time": "0:15:55", "remaining_time": "3:43:33"} +{"current_steps": 2380, "total_steps": 35625, "loss": 0.7573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9459940113525014e-05, "epoch": 0.33403508771929824, "percentage": 6.68, "elapsed_time": "0:15:59", "remaining_time": "3:43:18"} +{"current_steps": 2390, "total_steps": 35625, "loss": 0.7167, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.945537036383394e-05, "epoch": 0.3354385964912281, "percentage": 6.71, "elapsed_time": "0:16:02", "remaining_time": "3:43:04"} +{"current_steps": 2400, "total_steps": 35625, "loss": 0.5797, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.945078157479884e-05, "epoch": 0.3368421052631579, "percentage": 6.74, "elapsed_time": "0:16:05", "remaining_time": "3:42:47"} +{"current_steps": 2410, "total_steps": 35625, "loss": 0.603, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.944617374999224e-05, "epoch": 0.33824561403508774, "percentage": 6.76, "elapsed_time": "0:16:08", "remaining_time": "3:42:32"} +{"current_steps": 2420, "total_steps": 35625, "loss": 0.6496, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.944154689300148e-05, "epoch": 0.33964912280701753, "percentage": 6.79, "elapsed_time": "0:16:13", "remaining_time": "3:42:36"} +{"current_steps": 2430, "total_steps": 35625, "loss": 0.6945, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.943690100742875e-05, "epoch": 0.3410526315789474, "percentage": 6.82, "elapsed_time": "0:16:16", "remaining_time": "3:42:19"} +{"current_steps": 2440, "total_steps": 35625, "loss": 0.6489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.943223609689101e-05, "epoch": 0.3424561403508772, "percentage": 6.85, "elapsed_time": "0:16:19", "remaining_time": "3:42:02"} +{"current_steps": 2450, "total_steps": 35625, "loss": 0.6066, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9427552165020066e-05, "epoch": 0.34385964912280703, "percentage": 6.88, "elapsed_time": "0:16:23", "remaining_time": "3:41:58"} +{"current_steps": 2460, "total_steps": 35625, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9422849215462506e-05, "epoch": 0.3452631578947368, "percentage": 6.91, "elapsed_time": "0:16:27", "remaining_time": "3:41:56"} +{"current_steps": 2470, "total_steps": 35625, "loss": 0.7249, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9418127251879756e-05, "epoch": 0.3466666666666667, "percentage": 6.93, "elapsed_time": "0:16:33", "remaining_time": "3:42:09"} +{"current_steps": 2480, "total_steps": 35625, "loss": 0.7049, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9413386277948006e-05, "epoch": 0.3480701754385965, "percentage": 6.96, "elapsed_time": "0:16:36", "remaining_time": "3:41:57"} +{"current_steps": 2490, "total_steps": 35625, "loss": 0.7138, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9408626297358286e-05, "epoch": 0.3494736842105263, "percentage": 6.99, "elapsed_time": "0:16:41", "remaining_time": "3:42:02"} +{"current_steps": 2500, "total_steps": 35625, "loss": 0.6047, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.940384731381639e-05, "epoch": 0.3508771929824561, "percentage": 7.02, "elapsed_time": "0:16:44", "remaining_time": "3:41:45"} +{"current_steps": 2510, "total_steps": 35625, "loss": 0.6098, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9399049331042925e-05, "epoch": 0.35228070175438597, "percentage": 7.05, "elapsed_time": "0:16:48", "remaining_time": "3:41:50"} +{"current_steps": 2520, "total_steps": 35625, "loss": 0.6862, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.939423235277328e-05, "epoch": 0.35368421052631577, "percentage": 7.07, "elapsed_time": "0:16:52", "remaining_time": "3:41:35"} +{"current_steps": 2530, "total_steps": 35625, "loss": 0.7044, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.938939638275765e-05, "epoch": 0.3550877192982456, "percentage": 7.1, "elapsed_time": "0:16:55", "remaining_time": "3:41:25"} +{"current_steps": 2540, "total_steps": 35625, "loss": 0.6377, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.938454142476099e-05, "epoch": 0.3564912280701754, "percentage": 7.13, "elapsed_time": "0:17:00", "remaining_time": "3:41:28"} +{"current_steps": 2550, "total_steps": 35625, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9379667482563066e-05, "epoch": 0.35789473684210527, "percentage": 7.16, "elapsed_time": "0:17:05", "remaining_time": "3:41:46"} +{"current_steps": 2560, "total_steps": 35625, "loss": 0.5563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.937477455995839e-05, "epoch": 0.35929824561403506, "percentage": 7.19, "elapsed_time": "0:17:09", "remaining_time": "3:41:36"} +{"current_steps": 2570, "total_steps": 35625, "loss": 0.7217, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9369862660756286e-05, "epoch": 0.3607017543859649, "percentage": 7.21, "elapsed_time": "0:17:13", "remaining_time": "3:41:30"} +{"current_steps": 2580, "total_steps": 35625, "loss": 0.6424, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9364931788780835e-05, "epoch": 0.36210526315789476, "percentage": 7.24, "elapsed_time": "0:17:20", "remaining_time": "3:42:00"} +{"current_steps": 2590, "total_steps": 35625, "loss": 0.6449, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9359981947870874e-05, "epoch": 0.36350877192982456, "percentage": 7.27, "elapsed_time": "0:17:23", "remaining_time": "3:41:45"} +{"current_steps": 2600, "total_steps": 35625, "loss": 0.7963, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9355013141880045e-05, "epoch": 0.3649122807017544, "percentage": 7.3, "elapsed_time": "0:17:26", "remaining_time": "3:41:33"} +{"current_steps": 2610, "total_steps": 35625, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9350025374676725e-05, "epoch": 0.3663157894736842, "percentage": 7.33, "elapsed_time": "0:17:30", "remaining_time": "3:41:25"} +{"current_steps": 2620, "total_steps": 35625, "loss": 0.5228, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.934501865014405e-05, "epoch": 0.36771929824561406, "percentage": 7.35, "elapsed_time": "0:17:34", "remaining_time": "3:41:20"} +{"current_steps": 2630, "total_steps": 35625, "loss": 0.6206, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.933999297217994e-05, "epoch": 0.36912280701754385, "percentage": 7.38, "elapsed_time": "0:17:37", "remaining_time": "3:41:04"} +{"current_steps": 2640, "total_steps": 35625, "loss": 0.6324, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.933494834469706e-05, "epoch": 0.3705263157894737, "percentage": 7.41, "elapsed_time": "0:17:40", "remaining_time": "3:40:51"} +{"current_steps": 2650, "total_steps": 35625, "loss": 0.7658, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9329884771622817e-05, "epoch": 0.3719298245614035, "percentage": 7.44, "elapsed_time": "0:17:44", "remaining_time": "3:40:49"} +{"current_steps": 2660, "total_steps": 35625, "loss": 0.6711, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9324802256899385e-05, "epoch": 0.37333333333333335, "percentage": 7.47, "elapsed_time": "0:17:48", "remaining_time": "3:40:45"} +{"current_steps": 2670, "total_steps": 35625, "loss": 0.5751, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.931970080448366e-05, "epoch": 0.37473684210526315, "percentage": 7.49, "elapsed_time": "0:17:53", "remaining_time": "3:40:48"} +{"current_steps": 2680, "total_steps": 35625, "loss": 0.6772, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.931458041834731e-05, "epoch": 0.376140350877193, "percentage": 7.52, "elapsed_time": "0:17:57", "remaining_time": "3:40:48"} +{"current_steps": 2690, "total_steps": 35625, "loss": 0.6141, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9309441102476734e-05, "epoch": 0.3775438596491228, "percentage": 7.55, "elapsed_time": "0:18:01", "remaining_time": "3:40:37"} +{"current_steps": 2700, "total_steps": 35625, "loss": 0.627, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.930428286087306e-05, "epoch": 0.37894736842105264, "percentage": 7.58, "elapsed_time": "0:18:04", "remaining_time": "3:40:28"} +{"current_steps": 2710, "total_steps": 35625, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.929910569755215e-05, "epoch": 0.38035087719298244, "percentage": 7.61, "elapsed_time": "0:18:09", "remaining_time": "3:40:37"} +{"current_steps": 2720, "total_steps": 35625, "loss": 0.6379, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.929390961654462e-05, "epoch": 0.3817543859649123, "percentage": 7.64, "elapsed_time": "0:18:13", "remaining_time": "3:40:22"} +{"current_steps": 2730, "total_steps": 35625, "loss": 0.673, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9288694621895776e-05, "epoch": 0.3831578947368421, "percentage": 7.66, "elapsed_time": "0:18:17", "remaining_time": "3:40:20"} +{"current_steps": 2740, "total_steps": 35625, "loss": 0.6562, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.928346071766569e-05, "epoch": 0.38456140350877194, "percentage": 7.69, "elapsed_time": "0:18:21", "remaining_time": "3:40:22"} +{"current_steps": 2750, "total_steps": 35625, "loss": 0.6517, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.927820790792912e-05, "epoch": 0.38596491228070173, "percentage": 7.72, "elapsed_time": "0:18:24", "remaining_time": "3:40:07"} +{"current_steps": 2760, "total_steps": 35625, "loss": 0.6506, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9272936196775565e-05, "epoch": 0.3873684210526316, "percentage": 7.75, "elapsed_time": "0:18:28", "remaining_time": "3:40:01"} +{"current_steps": 2770, "total_steps": 35625, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.926764558830923e-05, "epoch": 0.3887719298245614, "percentage": 7.78, "elapsed_time": "0:18:34", "remaining_time": "3:40:14"} +{"current_steps": 2780, "total_steps": 35625, "loss": 0.6409, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.926233608664904e-05, "epoch": 0.39017543859649123, "percentage": 7.8, "elapsed_time": "0:18:37", "remaining_time": "3:40:05"} +{"current_steps": 2790, "total_steps": 35625, "loss": 0.7131, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9257007695928624e-05, "epoch": 0.391578947368421, "percentage": 7.83, "elapsed_time": "0:18:41", "remaining_time": "3:39:53"} +{"current_steps": 2800, "total_steps": 35625, "loss": 0.7967, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.925166042029631e-05, "epoch": 0.3929824561403509, "percentage": 7.86, "elapsed_time": "0:18:45", "remaining_time": "3:39:57"} +{"current_steps": 2810, "total_steps": 35625, "loss": 0.6273, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.924629426391515e-05, "epoch": 0.39438596491228073, "percentage": 7.89, "elapsed_time": "0:18:50", "remaining_time": "3:39:58"} +{"current_steps": 2820, "total_steps": 35625, "loss": 0.6419, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.924090923096286e-05, "epoch": 0.3957894736842105, "percentage": 7.92, "elapsed_time": "0:18:54", "remaining_time": "3:39:54"} +{"current_steps": 2830, "total_steps": 35625, "loss": 0.6296, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.923550532563189e-05, "epoch": 0.3971929824561404, "percentage": 7.94, "elapsed_time": "0:18:57", "remaining_time": "3:39:47"} +{"current_steps": 2840, "total_steps": 35625, "loss": 0.6323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.923008255212935e-05, "epoch": 0.39859649122807017, "percentage": 7.97, "elapsed_time": "0:19:01", "remaining_time": "3:39:41"} +{"current_steps": 2850, "total_steps": 35625, "loss": 0.6613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.922464091467707e-05, "epoch": 0.4, "percentage": 8.0, "elapsed_time": "0:19:05", "remaining_time": "3:39:38"} +{"current_steps": 2860, "total_steps": 35625, "loss": 0.6119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.921918041751155e-05, "epoch": 0.4014035087719298, "percentage": 8.03, "elapsed_time": "0:19:09", "remaining_time": "3:39:26"} +{"current_steps": 2870, "total_steps": 35625, "loss": 0.6575, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9213701064883966e-05, "epoch": 0.40280701754385967, "percentage": 8.06, "elapsed_time": "0:19:12", "remaining_time": "3:39:10"} +{"current_steps": 2880, "total_steps": 35625, "loss": 0.7369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9208202861060185e-05, "epoch": 0.40421052631578946, "percentage": 8.08, "elapsed_time": "0:19:16", "remaining_time": "3:39:10"} +{"current_steps": 2890, "total_steps": 35625, "loss": 0.6551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.920268581032074e-05, "epoch": 0.4056140350877193, "percentage": 8.11, "elapsed_time": "0:19:21", "remaining_time": "3:39:13"} +{"current_steps": 2900, "total_steps": 35625, "loss": 0.6584, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.919714991696086e-05, "epoch": 0.4070175438596491, "percentage": 8.14, "elapsed_time": "0:19:27", "remaining_time": "3:39:29"} +{"current_steps": 2910, "total_steps": 35625, "loss": 0.6674, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9191595185290414e-05, "epoch": 0.40842105263157896, "percentage": 8.17, "elapsed_time": "0:19:31", "remaining_time": "3:39:26"} +{"current_steps": 2920, "total_steps": 35625, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.918602161963396e-05, "epoch": 0.40982456140350876, "percentage": 8.2, "elapsed_time": "0:19:36", "remaining_time": "3:39:36"} +{"current_steps": 2930, "total_steps": 35625, "loss": 0.5862, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9180429224330706e-05, "epoch": 0.4112280701754386, "percentage": 8.22, "elapsed_time": "0:19:40", "remaining_time": "3:39:30"} +{"current_steps": 2940, "total_steps": 35625, "loss": 0.731, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.917481800373451e-05, "epoch": 0.4126315789473684, "percentage": 8.25, "elapsed_time": "0:19:45", "remaining_time": "3:39:38"} +{"current_steps": 2950, "total_steps": 35625, "loss": 0.6716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.916918796221393e-05, "epoch": 0.41403508771929826, "percentage": 8.28, "elapsed_time": "0:19:50", "remaining_time": "3:39:44"} +{"current_steps": 2960, "total_steps": 35625, "loss": 0.6427, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9163539104152124e-05, "epoch": 0.41543859649122805, "percentage": 8.31, "elapsed_time": "0:19:54", "remaining_time": "3:39:46"} +{"current_steps": 2970, "total_steps": 35625, "loss": 0.6184, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9157871433946925e-05, "epoch": 0.4168421052631579, "percentage": 8.34, "elapsed_time": "0:19:58", "remaining_time": "3:39:33"} +{"current_steps": 2980, "total_steps": 35625, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9152184956010813e-05, "epoch": 0.4182456140350877, "percentage": 8.36, "elapsed_time": "0:20:02", "remaining_time": "3:39:36"} +{"current_steps": 2990, "total_steps": 35625, "loss": 0.6517, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.91464796747709e-05, "epoch": 0.41964912280701755, "percentage": 8.39, "elapsed_time": "0:20:06", "remaining_time": "3:39:26"} +{"current_steps": 3000, "total_steps": 35625, "loss": 0.6656, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.914075559466895e-05, "epoch": 0.42105263157894735, "percentage": 8.42, "elapsed_time": "0:20:10", "remaining_time": "3:39:20"} +{"current_steps": 3010, "total_steps": 35625, "loss": 0.6605, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.913501272016135e-05, "epoch": 0.4224561403508772, "percentage": 8.45, "elapsed_time": "0:20:14", "remaining_time": "3:39:22"} +{"current_steps": 3020, "total_steps": 35625, "loss": 0.6348, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9129251055719125e-05, "epoch": 0.423859649122807, "percentage": 8.48, "elapsed_time": "0:20:18", "remaining_time": "3:39:10"} +{"current_steps": 3030, "total_steps": 35625, "loss": 0.6735, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.912347060582793e-05, "epoch": 0.42526315789473684, "percentage": 8.51, "elapsed_time": "0:20:21", "remaining_time": "3:38:58"} +{"current_steps": 3040, "total_steps": 35625, "loss": 0.604, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.911767137498805e-05, "epoch": 0.4266666666666667, "percentage": 8.53, "elapsed_time": "0:20:25", "remaining_time": "3:39:00"} +{"current_steps": 3050, "total_steps": 35625, "loss": 0.659, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.911185336771437e-05, "epoch": 0.4280701754385965, "percentage": 8.56, "elapsed_time": "0:20:29", "remaining_time": "3:38:47"} +{"current_steps": 3060, "total_steps": 35625, "loss": 0.6545, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.910601658853642e-05, "epoch": 0.42947368421052634, "percentage": 8.59, "elapsed_time": "0:20:32", "remaining_time": "3:38:34"} +{"current_steps": 3070, "total_steps": 35625, "loss": 0.6057, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.910016104199833e-05, "epoch": 0.43087719298245614, "percentage": 8.62, "elapsed_time": "0:20:37", "remaining_time": "3:38:39"} +{"current_steps": 3080, "total_steps": 35625, "loss": 0.5503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.909428673265884e-05, "epoch": 0.432280701754386, "percentage": 8.65, "elapsed_time": "0:20:40", "remaining_time": "3:38:32"} +{"current_steps": 3090, "total_steps": 35625, "loss": 0.6534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.90883936650913e-05, "epoch": 0.4336842105263158, "percentage": 8.67, "elapsed_time": "0:20:45", "remaining_time": "3:38:28"} +{"current_steps": 3100, "total_steps": 35625, "loss": 0.6696, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.908248184388367e-05, "epoch": 0.43508771929824563, "percentage": 8.7, "elapsed_time": "0:20:48", "remaining_time": "3:38:19"} +{"current_steps": 3110, "total_steps": 35625, "loss": 0.5936, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.90765512736385e-05, "epoch": 0.43649122807017543, "percentage": 8.73, "elapsed_time": "0:20:52", "remaining_time": "3:38:11"} +{"current_steps": 3120, "total_steps": 35625, "loss": 0.6154, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.907060195897296e-05, "epoch": 0.4378947368421053, "percentage": 8.76, "elapsed_time": "0:20:56", "remaining_time": "3:38:08"} +{"current_steps": 3130, "total_steps": 35625, "loss": 0.7975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.906463390451878e-05, "epoch": 0.4392982456140351, "percentage": 8.79, "elapsed_time": "0:21:00", "remaining_time": "3:38:06"} +{"current_steps": 3140, "total_steps": 35625, "loss": 0.6742, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9058647114922286e-05, "epoch": 0.44070175438596493, "percentage": 8.81, "elapsed_time": "0:21:03", "remaining_time": "3:37:56"} +{"current_steps": 3150, "total_steps": 35625, "loss": 0.6221, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9052641594844416e-05, "epoch": 0.4421052631578947, "percentage": 8.84, "elapsed_time": "0:21:09", "remaining_time": "3:38:03"} +{"current_steps": 3160, "total_steps": 35625, "loss": 0.7789, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9046617348960666e-05, "epoch": 0.4435087719298246, "percentage": 8.87, "elapsed_time": "0:21:13", "remaining_time": "3:38:02"} +{"current_steps": 3170, "total_steps": 35625, "loss": 0.6668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.904057438196111e-05, "epoch": 0.44491228070175437, "percentage": 8.9, "elapsed_time": "0:21:17", "remaining_time": "3:38:01"} +{"current_steps": 3180, "total_steps": 35625, "loss": 0.6732, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.903451269855043e-05, "epoch": 0.4463157894736842, "percentage": 8.93, "elapsed_time": "0:21:21", "remaining_time": "3:37:56"} +{"current_steps": 3190, "total_steps": 35625, "loss": 0.652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9028432303447826e-05, "epoch": 0.447719298245614, "percentage": 8.95, "elapsed_time": "0:21:26", "remaining_time": "3:38:00"} +{"current_steps": 3200, "total_steps": 35625, "loss": 0.7734, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.902233320138711e-05, "epoch": 0.44912280701754387, "percentage": 8.98, "elapsed_time": "0:21:31", "remaining_time": "3:38:03"} +{"current_steps": 3210, "total_steps": 35625, "loss": 0.6524, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.901621539711664e-05, "epoch": 0.45052631578947366, "percentage": 9.01, "elapsed_time": "0:21:35", "remaining_time": "3:37:57"} +{"current_steps": 3220, "total_steps": 35625, "loss": 0.5631, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.901007889539933e-05, "epoch": 0.4519298245614035, "percentage": 9.04, "elapsed_time": "0:21:39", "remaining_time": "3:37:56"} +{"current_steps": 3230, "total_steps": 35625, "loss": 0.6949, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.900392370101266e-05, "epoch": 0.4533333333333333, "percentage": 9.07, "elapsed_time": "0:21:43", "remaining_time": "3:37:51"} +{"current_steps": 3240, "total_steps": 35625, "loss": 0.7445, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.899774981874867e-05, "epoch": 0.45473684210526316, "percentage": 9.09, "elapsed_time": "0:21:46", "remaining_time": "3:37:41"} +{"current_steps": 3250, "total_steps": 35625, "loss": 0.6329, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8991557253413924e-05, "epoch": 0.45614035087719296, "percentage": 9.12, "elapsed_time": "0:21:49", "remaining_time": "3:37:29"} +{"current_steps": 3260, "total_steps": 35625, "loss": 0.6808, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8985346009829546e-05, "epoch": 0.4575438596491228, "percentage": 9.15, "elapsed_time": "0:21:53", "remaining_time": "3:37:16"} +{"current_steps": 3270, "total_steps": 35625, "loss": 0.6464, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8979116092831223e-05, "epoch": 0.4589473684210526, "percentage": 9.18, "elapsed_time": "0:21:58", "remaining_time": "3:37:21"} +{"current_steps": 3280, "total_steps": 35625, "loss": 0.6997, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.897286750726913e-05, "epoch": 0.46035087719298246, "percentage": 9.21, "elapsed_time": "0:22:01", "remaining_time": "3:37:08"} +{"current_steps": 3290, "total_steps": 35625, "loss": 0.642, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8966600258008024e-05, "epoch": 0.4617543859649123, "percentage": 9.24, "elapsed_time": "0:22:05", "remaining_time": "3:37:02"} +{"current_steps": 3300, "total_steps": 35625, "loss": 0.651, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.896031434992717e-05, "epoch": 0.4631578947368421, "percentage": 9.26, "elapsed_time": "0:22:08", "remaining_time": "3:36:56"} +{"current_steps": 3310, "total_steps": 35625, "loss": 0.6899, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8954009787920365e-05, "epoch": 0.46456140350877195, "percentage": 9.29, "elapsed_time": "0:22:14", "remaining_time": "3:37:08"} +{"current_steps": 3320, "total_steps": 35625, "loss": 0.7559, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.894768657689592e-05, "epoch": 0.46596491228070175, "percentage": 9.32, "elapsed_time": "0:22:18", "remaining_time": "3:37:00"} +{"current_steps": 3330, "total_steps": 35625, "loss": 0.6473, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8941344721776675e-05, "epoch": 0.4673684210526316, "percentage": 9.35, "elapsed_time": "0:22:22", "remaining_time": "3:36:59"} +{"current_steps": 3340, "total_steps": 35625, "loss": 0.726, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.893498422749997e-05, "epoch": 0.4687719298245614, "percentage": 9.38, "elapsed_time": "0:22:27", "remaining_time": "3:37:06"} +{"current_steps": 3350, "total_steps": 35625, "loss": 0.6, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8928605099017696e-05, "epoch": 0.47017543859649125, "percentage": 9.4, "elapsed_time": "0:22:31", "remaining_time": "3:36:57"} +{"current_steps": 3360, "total_steps": 35625, "loss": 0.6202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.89222073412962e-05, "epoch": 0.47157894736842104, "percentage": 9.43, "elapsed_time": "0:22:36", "remaining_time": "3:37:01"} +{"current_steps": 3370, "total_steps": 35625, "loss": 0.6717, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8915790959316356e-05, "epoch": 0.4729824561403509, "percentage": 9.46, "elapsed_time": "0:22:38", "remaining_time": "3:36:46"} +{"current_steps": 3380, "total_steps": 35625, "loss": 0.6328, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.890935595807355e-05, "epoch": 0.4743859649122807, "percentage": 9.49, "elapsed_time": "0:22:41", "remaining_time": "3:36:32"} +{"current_steps": 3390, "total_steps": 35625, "loss": 0.7271, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.890290234257764e-05, "epoch": 0.47578947368421054, "percentage": 9.52, "elapsed_time": "0:22:44", "remaining_time": "3:36:18"} +{"current_steps": 3400, "total_steps": 35625, "loss": 0.582, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.889643011785299e-05, "epoch": 0.47719298245614034, "percentage": 9.54, "elapsed_time": "0:22:48", "remaining_time": "3:36:12"} +{"current_steps": 3410, "total_steps": 35625, "loss": 0.6851, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.888993928893846e-05, "epoch": 0.4785964912280702, "percentage": 9.57, "elapsed_time": "0:22:51", "remaining_time": "3:36:00"} +{"current_steps": 3420, "total_steps": 35625, "loss": 0.583, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.888342986088736e-05, "epoch": 0.48, "percentage": 9.6, "elapsed_time": "0:22:56", "remaining_time": "3:35:59"} +{"current_steps": 3430, "total_steps": 35625, "loss": 0.6261, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.887690183876752e-05, "epoch": 0.48140350877192983, "percentage": 9.63, "elapsed_time": "0:22:59", "remaining_time": "3:35:46"} +{"current_steps": 3440, "total_steps": 35625, "loss": 0.6563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.887035522766122e-05, "epoch": 0.48280701754385963, "percentage": 9.66, "elapsed_time": "0:23:03", "remaining_time": "3:35:42"} +{"current_steps": 3450, "total_steps": 35625, "loss": 0.5178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.886379003266523e-05, "epoch": 0.4842105263157895, "percentage": 9.68, "elapsed_time": "0:23:06", "remaining_time": "3:35:28"} +{"current_steps": 3460, "total_steps": 35625, "loss": 0.824, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.885720625889078e-05, "epoch": 0.4856140350877193, "percentage": 9.71, "elapsed_time": "0:23:09", "remaining_time": "3:35:16"} +{"current_steps": 3470, "total_steps": 35625, "loss": 0.6821, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8850603911463556e-05, "epoch": 0.4870175438596491, "percentage": 9.74, "elapsed_time": "0:23:13", "remaining_time": "3:35:10"} +{"current_steps": 3480, "total_steps": 35625, "loss": 0.6955, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8843982995523704e-05, "epoch": 0.4884210526315789, "percentage": 9.77, "elapsed_time": "0:23:17", "remaining_time": "3:35:11"} +{"current_steps": 3490, "total_steps": 35625, "loss": 0.6447, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.883734351622586e-05, "epoch": 0.4898245614035088, "percentage": 9.8, "elapsed_time": "0:23:21", "remaining_time": "3:35:08"} +{"current_steps": 3500, "total_steps": 35625, "loss": 0.6454, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8830685478739057e-05, "epoch": 0.49122807017543857, "percentage": 9.82, "elapsed_time": "0:23:26", "remaining_time": "3:35:10"} +{"current_steps": 3510, "total_steps": 35625, "loss": 0.6676, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8824008888246834e-05, "epoch": 0.4926315789473684, "percentage": 9.85, "elapsed_time": "0:23:30", "remaining_time": "3:35:05"} +{"current_steps": 3520, "total_steps": 35625, "loss": 0.5715, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8817313749947115e-05, "epoch": 0.49403508771929827, "percentage": 9.88, "elapsed_time": "0:23:33", "remaining_time": "3:34:53"} +{"current_steps": 3530, "total_steps": 35625, "loss": 0.6479, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.881060006905232e-05, "epoch": 0.49543859649122807, "percentage": 9.91, "elapsed_time": "0:23:36", "remaining_time": "3:34:40"} +{"current_steps": 3540, "total_steps": 35625, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.880386785078925e-05, "epoch": 0.4968421052631579, "percentage": 9.94, "elapsed_time": "0:23:40", "remaining_time": "3:34:34"} +{"current_steps": 3550, "total_steps": 35625, "loss": 0.6503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.87971171003992e-05, "epoch": 0.4982456140350877, "percentage": 9.96, "elapsed_time": "0:23:43", "remaining_time": "3:34:22"} +{"current_steps": 3560, "total_steps": 35625, "loss": 0.6525, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.879034782313786e-05, "epoch": 0.49964912280701756, "percentage": 9.99, "elapsed_time": "0:23:46", "remaining_time": "3:34:09"} +{"current_steps": 3570, "total_steps": 35625, "loss": 0.633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.878356002427532e-05, "epoch": 0.5010526315789474, "percentage": 10.02, "elapsed_time": "0:23:50", "remaining_time": "3:34:01"} +{"current_steps": 3580, "total_steps": 35625, "loss": 0.6184, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.877675370909612e-05, "epoch": 0.5024561403508772, "percentage": 10.05, "elapsed_time": "0:23:54", "remaining_time": "3:33:56"} +{"current_steps": 3590, "total_steps": 35625, "loss": 0.64, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.876992888289923e-05, "epoch": 0.503859649122807, "percentage": 10.08, "elapsed_time": "0:23:58", "remaining_time": "3:33:54"} +{"current_steps": 3600, "total_steps": 35625, "loss": 0.6238, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.876308555099799e-05, "epoch": 0.5052631578947369, "percentage": 10.11, "elapsed_time": "0:24:01", "remaining_time": "3:33:45"} +{"current_steps": 3610, "total_steps": 35625, "loss": 0.7433, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.875622371872017e-05, "epoch": 0.5066666666666667, "percentage": 10.13, "elapsed_time": "0:24:05", "remaining_time": "3:33:37"} +{"current_steps": 3620, "total_steps": 35625, "loss": 0.7031, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.874934339140795e-05, "epoch": 0.5080701754385964, "percentage": 10.16, "elapsed_time": "0:24:10", "remaining_time": "3:33:42"} +{"current_steps": 3630, "total_steps": 35625, "loss": 0.5443, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8742444574417904e-05, "epoch": 0.5094736842105263, "percentage": 10.19, "elapsed_time": "0:24:14", "remaining_time": "3:33:38"} +{"current_steps": 3640, "total_steps": 35625, "loss": 0.6728, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.873552727312099e-05, "epoch": 0.5108771929824562, "percentage": 10.22, "elapsed_time": "0:24:20", "remaining_time": "3:33:50"} +{"current_steps": 3650, "total_steps": 35625, "loss": 0.6321, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.872859149290256e-05, "epoch": 0.512280701754386, "percentage": 10.25, "elapsed_time": "0:24:23", "remaining_time": "3:33:44"} +{"current_steps": 3660, "total_steps": 35625, "loss": 0.6301, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.872163723916237e-05, "epoch": 0.5136842105263157, "percentage": 10.27, "elapsed_time": "0:24:29", "remaining_time": "3:33:50"} +{"current_steps": 3670, "total_steps": 35625, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.871466451731453e-05, "epoch": 0.5150877192982456, "percentage": 10.3, "elapsed_time": "0:24:32", "remaining_time": "3:33:37"} +{"current_steps": 3680, "total_steps": 35625, "loss": 0.5503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.870767333278755e-05, "epoch": 0.5164912280701754, "percentage": 10.33, "elapsed_time": "0:24:37", "remaining_time": "3:33:46"} +{"current_steps": 3690, "total_steps": 35625, "loss": 0.6342, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.87006636910243e-05, "epoch": 0.5178947368421053, "percentage": 10.36, "elapsed_time": "0:24:40", "remaining_time": "3:33:35"} +{"current_steps": 3700, "total_steps": 35625, "loss": 0.5933, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8693635597482045e-05, "epoch": 0.519298245614035, "percentage": 10.39, "elapsed_time": "0:24:43", "remaining_time": "3:33:24"} +{"current_steps": 3710, "total_steps": 35625, "loss": 0.5878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.868658905763238e-05, "epoch": 0.5207017543859649, "percentage": 10.41, "elapsed_time": "0:24:48", "remaining_time": "3:33:28"} +{"current_steps": 3720, "total_steps": 35625, "loss": 0.5478, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8679524076961284e-05, "epoch": 0.5221052631578947, "percentage": 10.44, "elapsed_time": "0:24:52", "remaining_time": "3:33:17"} +{"current_steps": 3730, "total_steps": 35625, "loss": 0.6024, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.867244066096909e-05, "epoch": 0.5235087719298246, "percentage": 10.47, "elapsed_time": "0:24:56", "remaining_time": "3:33:13"} +{"current_steps": 3740, "total_steps": 35625, "loss": 0.6106, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.866533881517046e-05, "epoch": 0.5249122807017544, "percentage": 10.5, "elapsed_time": "0:25:00", "remaining_time": "3:33:11"} +{"current_steps": 3750, "total_steps": 35625, "loss": 0.602, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.865821854509445e-05, "epoch": 0.5263157894736842, "percentage": 10.53, "elapsed_time": "0:25:05", "remaining_time": "3:33:17"} +{"current_steps": 3760, "total_steps": 35625, "loss": 0.7147, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.865107985628442e-05, "epoch": 0.527719298245614, "percentage": 10.55, "elapsed_time": "0:25:10", "remaining_time": "3:33:18"} +{"current_steps": 3770, "total_steps": 35625, "loss": 0.6561, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.86439227542981e-05, "epoch": 0.5291228070175439, "percentage": 10.58, "elapsed_time": "0:25:13", "remaining_time": "3:33:06"} +{"current_steps": 3780, "total_steps": 35625, "loss": 0.7062, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.863674724470751e-05, "epoch": 0.5305263157894737, "percentage": 10.61, "elapsed_time": "0:25:17", "remaining_time": "3:33:06"} +{"current_steps": 3790, "total_steps": 35625, "loss": 0.7019, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.862955333309905e-05, "epoch": 0.5319298245614035, "percentage": 10.64, "elapsed_time": "0:25:22", "remaining_time": "3:33:08"} +{"current_steps": 3800, "total_steps": 35625, "loss": 0.5702, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8622341025073425e-05, "epoch": 0.5333333333333333, "percentage": 10.67, "elapsed_time": "0:25:26", "remaining_time": "3:33:01"} +{"current_steps": 3810, "total_steps": 35625, "loss": 0.6956, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.861511032624567e-05, "epoch": 0.5347368421052632, "percentage": 10.69, "elapsed_time": "0:25:29", "remaining_time": "3:32:52"} +{"current_steps": 3820, "total_steps": 35625, "loss": 0.6266, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.860786124224512e-05, "epoch": 0.536140350877193, "percentage": 10.72, "elapsed_time": "0:25:33", "remaining_time": "3:32:44"} +{"current_steps": 3830, "total_steps": 35625, "loss": 0.6758, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.860059377871544e-05, "epoch": 0.5375438596491228, "percentage": 10.75, "elapsed_time": "0:25:36", "remaining_time": "3:32:38"} +{"current_steps": 3840, "total_steps": 35625, "loss": 0.6559, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.85933079413146e-05, "epoch": 0.5389473684210526, "percentage": 10.78, "elapsed_time": "0:25:40", "remaining_time": "3:32:35"} +{"current_steps": 3850, "total_steps": 35625, "loss": 0.6052, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.858600373571487e-05, "epoch": 0.5403508771929825, "percentage": 10.81, "elapsed_time": "0:25:44", "remaining_time": "3:32:23"} +{"current_steps": 3860, "total_steps": 35625, "loss": 0.6119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8578681167602834e-05, "epoch": 0.5417543859649123, "percentage": 10.84, "elapsed_time": "0:25:47", "remaining_time": "3:32:16"} +{"current_steps": 3870, "total_steps": 35625, "loss": 0.5872, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8571340242679354e-05, "epoch": 0.5431578947368421, "percentage": 10.86, "elapsed_time": "0:25:50", "remaining_time": "3:32:04"} +{"current_steps": 3880, "total_steps": 35625, "loss": 0.7302, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.856398096665959e-05, "epoch": 0.5445614035087719, "percentage": 10.89, "elapsed_time": "0:25:53", "remaining_time": "3:31:52"} +{"current_steps": 3890, "total_steps": 35625, "loss": 0.6889, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8556603345273e-05, "epoch": 0.5459649122807018, "percentage": 10.92, "elapsed_time": "0:25:56", "remaining_time": "3:31:41"} +{"current_steps": 3900, "total_steps": 35625, "loss": 0.6045, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8549207384263305e-05, "epoch": 0.5473684210526316, "percentage": 10.95, "elapsed_time": "0:26:00", "remaining_time": "3:31:34"} +{"current_steps": 3910, "total_steps": 35625, "loss": 0.743, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.854179308938852e-05, "epoch": 0.5487719298245614, "percentage": 10.98, "elapsed_time": "0:26:03", "remaining_time": "3:31:24"} +{"current_steps": 3920, "total_steps": 35625, "loss": 0.6416, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8534360466420926e-05, "epoch": 0.5501754385964912, "percentage": 11.0, "elapsed_time": "0:26:06", "remaining_time": "3:31:13"} +{"current_steps": 3930, "total_steps": 35625, "loss": 0.5975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.852690952114708e-05, "epoch": 0.5515789473684211, "percentage": 11.03, "elapsed_time": "0:26:10", "remaining_time": "3:31:06"} +{"current_steps": 3940, "total_steps": 35625, "loss": 0.7975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.851944025936779e-05, "epoch": 0.5529824561403509, "percentage": 11.06, "elapsed_time": "0:26:13", "remaining_time": "3:30:56"} +{"current_steps": 3950, "total_steps": 35625, "loss": 0.6836, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.851195268689813e-05, "epoch": 0.5543859649122806, "percentage": 11.09, "elapsed_time": "0:26:17", "remaining_time": "3:30:47"} +{"current_steps": 3960, "total_steps": 35625, "loss": 0.5265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.850444680956745e-05, "epoch": 0.5557894736842105, "percentage": 11.12, "elapsed_time": "0:26:20", "remaining_time": "3:30:40"} +{"current_steps": 3970, "total_steps": 35625, "loss": 0.687, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8496922633219314e-05, "epoch": 0.5571929824561404, "percentage": 11.14, "elapsed_time": "0:26:24", "remaining_time": "3:30:36"} +{"current_steps": 3980, "total_steps": 35625, "loss": 0.5644, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8489380163711556e-05, "epoch": 0.5585964912280702, "percentage": 11.17, "elapsed_time": "0:26:29", "remaining_time": "3:30:37"} +{"current_steps": 3990, "total_steps": 35625, "loss": 0.6013, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.848181940691625e-05, "epoch": 0.56, "percentage": 11.2, "elapsed_time": "0:26:33", "remaining_time": "3:30:31"} +{"current_steps": 4000, "total_steps": 35625, "loss": 0.724, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8474240368719703e-05, "epoch": 0.5614035087719298, "percentage": 11.23, "elapsed_time": "0:26:38", "remaining_time": "3:30:35"} +{"current_steps": 4000, "total_steps": 35625, "loss": null, "eval_loss": 0.6521075963973999, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.5614035087719298, "percentage": 11.23, "elapsed_time": "0:26:38", "remaining_time": "3:30:35"} +{"current_steps": 4010, "total_steps": 35625, "loss": 0.6668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.846664305502245e-05, "epoch": 0.5628070175438596, "percentage": 11.26, "elapsed_time": "0:27:26", "remaining_time": "3:36:20"} +{"current_steps": 4020, "total_steps": 35625, "loss": 0.6898, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8459027471739284e-05, "epoch": 0.5642105263157895, "percentage": 11.28, "elapsed_time": "0:27:29", "remaining_time": "3:36:08"} +{"current_steps": 4030, "total_steps": 35625, "loss": 0.6639, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8451393624799165e-05, "epoch": 0.5656140350877193, "percentage": 11.31, "elapsed_time": "0:27:32", "remaining_time": "3:35:58"} +{"current_steps": 4040, "total_steps": 35625, "loss": 0.7336, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.844374152014532e-05, "epoch": 0.5670175438596491, "percentage": 11.34, "elapsed_time": "0:27:35", "remaining_time": "3:35:45"} +{"current_steps": 4050, "total_steps": 35625, "loss": 0.6233, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.843607116373518e-05, "epoch": 0.5684210526315789, "percentage": 11.37, "elapsed_time": "0:27:40", "remaining_time": "3:35:47"} +{"current_steps": 4060, "total_steps": 35625, "loss": 0.6178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8428382561540366e-05, "epoch": 0.5698245614035088, "percentage": 11.4, "elapsed_time": "0:27:44", "remaining_time": "3:35:43"} +{"current_steps": 4070, "total_steps": 35625, "loss": 0.6121, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8420675719546723e-05, "epoch": 0.5712280701754386, "percentage": 11.42, "elapsed_time": "0:27:48", "remaining_time": "3:35:37"} +{"current_steps": 4080, "total_steps": 35625, "loss": 0.6225, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8412950643754305e-05, "epoch": 0.5726315789473684, "percentage": 11.45, "elapsed_time": "0:27:51", "remaining_time": "3:35:24"} +{"current_steps": 4090, "total_steps": 35625, "loss": 0.6502, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.840520734017734e-05, "epoch": 0.5740350877192982, "percentage": 11.48, "elapsed_time": "0:27:55", "remaining_time": "3:35:17"} +{"current_steps": 4100, "total_steps": 35625, "loss": 0.6799, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.839744581484425e-05, "epoch": 0.5754385964912281, "percentage": 11.51, "elapsed_time": "0:27:58", "remaining_time": "3:35:07"} +{"current_steps": 4110, "total_steps": 35625, "loss": 0.7671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8389666073797646e-05, "epoch": 0.5768421052631579, "percentage": 11.54, "elapsed_time": "0:28:01", "remaining_time": "3:34:56"} +{"current_steps": 4120, "total_steps": 35625, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8381868123094335e-05, "epoch": 0.5782456140350877, "percentage": 11.56, "elapsed_time": "0:28:07", "remaining_time": "3:35:01"} +{"current_steps": 4130, "total_steps": 35625, "loss": 0.6621, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.837405196880529e-05, "epoch": 0.5796491228070175, "percentage": 11.59, "elapsed_time": "0:28:11", "remaining_time": "3:35:02"} +{"current_steps": 4140, "total_steps": 35625, "loss": 0.601, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.836621761701564e-05, "epoch": 0.5810526315789474, "percentage": 11.62, "elapsed_time": "0:28:15", "remaining_time": "3:34:57"} +{"current_steps": 4150, "total_steps": 35625, "loss": 0.6818, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.835836507382471e-05, "epoch": 0.5824561403508772, "percentage": 11.65, "elapsed_time": "0:28:19", "remaining_time": "3:34:50"} +{"current_steps": 4160, "total_steps": 35625, "loss": 0.6688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.835049434534596e-05, "epoch": 0.583859649122807, "percentage": 11.68, "elapsed_time": "0:28:24", "remaining_time": "3:34:50"} +{"current_steps": 4170, "total_steps": 35625, "loss": 0.7652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8342605437707034e-05, "epoch": 0.5852631578947368, "percentage": 11.71, "elapsed_time": "0:28:27", "remaining_time": "3:34:43"} +{"current_steps": 4180, "total_steps": 35625, "loss": 0.5381, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8334698357049715e-05, "epoch": 0.5866666666666667, "percentage": 11.73, "elapsed_time": "0:28:30", "remaining_time": "3:34:31"} +{"current_steps": 4190, "total_steps": 35625, "loss": 0.6786, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.832677310952993e-05, "epoch": 0.5880701754385965, "percentage": 11.76, "elapsed_time": "0:28:34", "remaining_time": "3:34:21"} +{"current_steps": 4200, "total_steps": 35625, "loss": 0.629, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.831882970131777e-05, "epoch": 0.5894736842105263, "percentage": 11.79, "elapsed_time": "0:28:39", "remaining_time": "3:34:23"} +{"current_steps": 4210, "total_steps": 35625, "loss": 0.6021, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.831086813859743e-05, "epoch": 0.5908771929824561, "percentage": 11.82, "elapsed_time": "0:28:42", "remaining_time": "3:34:15"} +{"current_steps": 4220, "total_steps": 35625, "loss": 0.5706, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.830288842756728e-05, "epoch": 0.592280701754386, "percentage": 11.85, "elapsed_time": "0:28:45", "remaining_time": "3:34:04"} +{"current_steps": 4230, "total_steps": 35625, "loss": 0.6716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8294890574439784e-05, "epoch": 0.5936842105263158, "percentage": 11.87, "elapsed_time": "0:28:48", "remaining_time": "3:33:51"} +{"current_steps": 4240, "total_steps": 35625, "loss": 0.6315, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.828687458544155e-05, "epoch": 0.5950877192982457, "percentage": 11.9, "elapsed_time": "0:28:52", "remaining_time": "3:33:44"} +{"current_steps": 4250, "total_steps": 35625, "loss": 0.5633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.82788404668133e-05, "epoch": 0.5964912280701754, "percentage": 11.93, "elapsed_time": "0:28:56", "remaining_time": "3:33:40"} +{"current_steps": 4260, "total_steps": 35625, "loss": 0.6747, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.827078822480987e-05, "epoch": 0.5978947368421053, "percentage": 11.96, "elapsed_time": "0:29:00", "remaining_time": "3:33:31"} +{"current_steps": 4270, "total_steps": 35625, "loss": 0.6555, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.826271786570021e-05, "epoch": 0.5992982456140351, "percentage": 11.99, "elapsed_time": "0:29:04", "remaining_time": "3:33:28"} +{"current_steps": 4280, "total_steps": 35625, "loss": 0.5987, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.825462939576737e-05, "epoch": 0.600701754385965, "percentage": 12.01, "elapsed_time": "0:29:07", "remaining_time": "3:33:17"} +{"current_steps": 4290, "total_steps": 35625, "loss": 0.6753, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8246522821308495e-05, "epoch": 0.6021052631578947, "percentage": 12.04, "elapsed_time": "0:29:13", "remaining_time": "3:33:25"} +{"current_steps": 4300, "total_steps": 35625, "loss": 0.5856, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.823839814863484e-05, "epoch": 0.6035087719298246, "percentage": 12.07, "elapsed_time": "0:29:17", "remaining_time": "3:33:21"} +{"current_steps": 4310, "total_steps": 35625, "loss": 0.6204, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.823025538407173e-05, "epoch": 0.6049122807017544, "percentage": 12.1, "elapsed_time": "0:29:20", "remaining_time": "3:33:12"} +{"current_steps": 4320, "total_steps": 35625, "loss": 0.5866, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.82220945339586e-05, "epoch": 0.6063157894736843, "percentage": 12.13, "elapsed_time": "0:29:24", "remaining_time": "3:33:07"} +{"current_steps": 4330, "total_steps": 35625, "loss": 0.7384, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8213915604648944e-05, "epoch": 0.607719298245614, "percentage": 12.15, "elapsed_time": "0:29:27", "remaining_time": "3:32:55"} +{"current_steps": 4340, "total_steps": 35625, "loss": 0.7113, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.820571860251034e-05, "epoch": 0.6091228070175438, "percentage": 12.18, "elapsed_time": "0:29:30", "remaining_time": "3:32:44"} +{"current_steps": 4350, "total_steps": 35625, "loss": 0.6544, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.819750353392443e-05, "epoch": 0.6105263157894737, "percentage": 12.21, "elapsed_time": "0:29:34", "remaining_time": "3:32:34"} +{"current_steps": 4360, "total_steps": 35625, "loss": 0.6317, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.818927040528693e-05, "epoch": 0.6119298245614035, "percentage": 12.24, "elapsed_time": "0:29:37", "remaining_time": "3:32:25"} +{"current_steps": 4370, "total_steps": 35625, "loss": 0.5756, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.818101922300762e-05, "epoch": 0.6133333333333333, "percentage": 12.27, "elapsed_time": "0:29:40", "remaining_time": "3:32:16"} +{"current_steps": 4380, "total_steps": 35625, "loss": 0.687, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8172749993510315e-05, "epoch": 0.6147368421052631, "percentage": 12.29, "elapsed_time": "0:29:44", "remaining_time": "3:32:13"} +{"current_steps": 4390, "total_steps": 35625, "loss": 0.6573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.81644627232329e-05, "epoch": 0.616140350877193, "percentage": 12.32, "elapsed_time": "0:29:48", "remaining_time": "3:32:05"} +{"current_steps": 4400, "total_steps": 35625, "loss": 0.6787, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.81561574186273e-05, "epoch": 0.6175438596491228, "percentage": 12.35, "elapsed_time": "0:29:51", "remaining_time": "3:31:54"} +{"current_steps": 4410, "total_steps": 35625, "loss": 0.5503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.814783408615948e-05, "epoch": 0.6189473684210526, "percentage": 12.38, "elapsed_time": "0:29:55", "remaining_time": "3:31:51"} +{"current_steps": 4420, "total_steps": 35625, "loss": 0.6495, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.813949273230944e-05, "epoch": 0.6203508771929824, "percentage": 12.41, "elapsed_time": "0:30:00", "remaining_time": "3:31:54"} +{"current_steps": 4430, "total_steps": 35625, "loss": 0.5845, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8131133363571214e-05, "epoch": 0.6217543859649123, "percentage": 12.44, "elapsed_time": "0:30:04", "remaining_time": "3:31:47"} +{"current_steps": 4440, "total_steps": 35625, "loss": 0.6093, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8122755986452845e-05, "epoch": 0.6231578947368421, "percentage": 12.46, "elapsed_time": "0:30:07", "remaining_time": "3:31:37"} +{"current_steps": 4450, "total_steps": 35625, "loss": 0.6465, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8114360607476416e-05, "epoch": 0.624561403508772, "percentage": 12.49, "elapsed_time": "0:30:12", "remaining_time": "3:31:36"} +{"current_steps": 4460, "total_steps": 35625, "loss": 0.6228, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.810594723317801e-05, "epoch": 0.6259649122807017, "percentage": 12.52, "elapsed_time": "0:30:15", "remaining_time": "3:31:26"} +{"current_steps": 4470, "total_steps": 35625, "loss": 0.657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.809751587010774e-05, "epoch": 0.6273684210526316, "percentage": 12.55, "elapsed_time": "0:30:18", "remaining_time": "3:31:15"} +{"current_steps": 4480, "total_steps": 35625, "loss": 0.6068, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.80890665248297e-05, "epoch": 0.6287719298245614, "percentage": 12.58, "elapsed_time": "0:30:22", "remaining_time": "3:31:07"} +{"current_steps": 4490, "total_steps": 35625, "loss": 0.7177, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.808059920392201e-05, "epoch": 0.6301754385964913, "percentage": 12.6, "elapsed_time": "0:30:26", "remaining_time": "3:31:03"} +{"current_steps": 4500, "total_steps": 35625, "loss": 0.6333, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.807211391397674e-05, "epoch": 0.631578947368421, "percentage": 12.63, "elapsed_time": "0:30:29", "remaining_time": "3:30:52"} +{"current_steps": 4510, "total_steps": 35625, "loss": 0.6396, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.806361066160001e-05, "epoch": 0.6329824561403509, "percentage": 12.66, "elapsed_time": "0:30:35", "remaining_time": "3:31:03"} +{"current_steps": 4520, "total_steps": 35625, "loss": 0.622, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8055089453411875e-05, "epoch": 0.6343859649122807, "percentage": 12.69, "elapsed_time": "0:30:38", "remaining_time": "3:30:53"} +{"current_steps": 4530, "total_steps": 35625, "loss": 0.6551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.80465502960464e-05, "epoch": 0.6357894736842106, "percentage": 12.72, "elapsed_time": "0:30:43", "remaining_time": "3:30:54"} +{"current_steps": 4540, "total_steps": 35625, "loss": 0.6874, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.80379931961516e-05, "epoch": 0.6371929824561403, "percentage": 12.74, "elapsed_time": "0:30:46", "remaining_time": "3:30:44"} +{"current_steps": 4550, "total_steps": 35625, "loss": 0.5982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8029418160389484e-05, "epoch": 0.6385964912280702, "percentage": 12.77, "elapsed_time": "0:30:49", "remaining_time": "3:30:33"} +{"current_steps": 4560, "total_steps": 35625, "loss": 0.5541, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8020825195435994e-05, "epoch": 0.64, "percentage": 12.8, "elapsed_time": "0:30:53", "remaining_time": "3:30:25"} +{"current_steps": 4570, "total_steps": 35625, "loss": 0.7297, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8012214307981064e-05, "epoch": 0.6414035087719299, "percentage": 12.83, "elapsed_time": "0:30:58", "remaining_time": "3:30:27"} +{"current_steps": 4580, "total_steps": 35625, "loss": 0.7694, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.800358550472855e-05, "epoch": 0.6428070175438596, "percentage": 12.86, "elapsed_time": "0:31:01", "remaining_time": "3:30:18"} +{"current_steps": 4590, "total_steps": 35625, "loss": 0.6194, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.799493879239628e-05, "epoch": 0.6442105263157895, "percentage": 12.88, "elapsed_time": "0:31:04", "remaining_time": "3:30:08"} +{"current_steps": 4600, "total_steps": 35625, "loss": 0.6358, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7986274177716024e-05, "epoch": 0.6456140350877193, "percentage": 12.91, "elapsed_time": "0:31:08", "remaining_time": "3:30:01"} +{"current_steps": 4610, "total_steps": 35625, "loss": 0.6828, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.797759166743346e-05, "epoch": 0.6470175438596492, "percentage": 12.94, "elapsed_time": "0:31:12", "remaining_time": "3:29:57"} +{"current_steps": 4620, "total_steps": 35625, "loss": 0.7163, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7968891268308246e-05, "epoch": 0.6484210526315789, "percentage": 12.97, "elapsed_time": "0:31:16", "remaining_time": "3:29:52"} +{"current_steps": 4630, "total_steps": 35625, "loss": 0.5935, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.796017298711391e-05, "epoch": 0.6498245614035087, "percentage": 13.0, "elapsed_time": "0:31:19", "remaining_time": "3:29:43"} +{"current_steps": 4640, "total_steps": 35625, "loss": 0.5696, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.795143683063797e-05, "epoch": 0.6512280701754386, "percentage": 13.02, "elapsed_time": "0:31:22", "remaining_time": "3:29:33"} +{"current_steps": 4650, "total_steps": 35625, "loss": 0.5665, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7942682805681797e-05, "epoch": 0.6526315789473685, "percentage": 13.05, "elapsed_time": "0:31:25", "remaining_time": "3:29:21"} +{"current_steps": 4660, "total_steps": 35625, "loss": 0.6242, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.79339109190607e-05, "epoch": 0.6540350877192982, "percentage": 13.08, "elapsed_time": "0:31:29", "remaining_time": "3:29:17"} +{"current_steps": 4670, "total_steps": 35625, "loss": 0.6259, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.792512117760391e-05, "epoch": 0.655438596491228, "percentage": 13.11, "elapsed_time": "0:31:32", "remaining_time": "3:29:06"} +{"current_steps": 4680, "total_steps": 35625, "loss": 0.6757, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7916313588154514e-05, "epoch": 0.6568421052631579, "percentage": 13.14, "elapsed_time": "0:31:36", "remaining_time": "3:28:58"} +{"current_steps": 4690, "total_steps": 35625, "loss": 0.6324, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.790748815756954e-05, "epoch": 0.6582456140350877, "percentage": 13.16, "elapsed_time": "0:31:40", "remaining_time": "3:28:54"} +{"current_steps": 4700, "total_steps": 35625, "loss": 0.5834, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.78986448927199e-05, "epoch": 0.6596491228070176, "percentage": 13.19, "elapsed_time": "0:31:44", "remaining_time": "3:28:51"} +{"current_steps": 4710, "total_steps": 35625, "loss": 0.6985, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.788978380049036e-05, "epoch": 0.6610526315789473, "percentage": 13.22, "elapsed_time": "0:31:47", "remaining_time": "3:28:41"} +{"current_steps": 4720, "total_steps": 35625, "loss": 0.6595, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.78809048877796e-05, "epoch": 0.6624561403508772, "percentage": 13.25, "elapsed_time": "0:31:51", "remaining_time": "3:28:38"} +{"current_steps": 4730, "total_steps": 35625, "loss": 0.7508, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.787200816150014e-05, "epoch": 0.663859649122807, "percentage": 13.28, "elapsed_time": "0:31:55", "remaining_time": "3:28:34"} +{"current_steps": 4740, "total_steps": 35625, "loss": 0.6452, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.786309362857839e-05, "epoch": 0.6652631578947369, "percentage": 13.31, "elapsed_time": "0:31:59", "remaining_time": "3:28:29"} +{"current_steps": 4750, "total_steps": 35625, "loss": 0.6171, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.785416129595463e-05, "epoch": 0.6666666666666666, "percentage": 13.33, "elapsed_time": "0:32:03", "remaining_time": "3:28:25"} +{"current_steps": 4760, "total_steps": 35625, "loss": 0.654, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.784521117058298e-05, "epoch": 0.6680701754385965, "percentage": 13.36, "elapsed_time": "0:32:08", "remaining_time": "3:28:25"} +{"current_steps": 4770, "total_steps": 35625, "loss": 0.6674, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7836243259431425e-05, "epoch": 0.6694736842105263, "percentage": 13.39, "elapsed_time": "0:32:12", "remaining_time": "3:28:17"} +{"current_steps": 4780, "total_steps": 35625, "loss": 0.6319, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7827257569481776e-05, "epoch": 0.6708771929824562, "percentage": 13.42, "elapsed_time": "0:32:15", "remaining_time": "3:28:12"} +{"current_steps": 4790, "total_steps": 35625, "loss": 0.5856, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.781825410772972e-05, "epoch": 0.6722807017543859, "percentage": 13.45, "elapsed_time": "0:32:19", "remaining_time": "3:28:07"} +{"current_steps": 4800, "total_steps": 35625, "loss": 0.5919, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.780923288118475e-05, "epoch": 0.6736842105263158, "percentage": 13.47, "elapsed_time": "0:32:23", "remaining_time": "3:28:03"} +{"current_steps": 4810, "total_steps": 35625, "loss": 0.6192, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.78001938968702e-05, "epoch": 0.6750877192982456, "percentage": 13.5, "elapsed_time": "0:32:27", "remaining_time": "3:27:56"} +{"current_steps": 4820, "total_steps": 35625, "loss": 0.6628, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.779113716182323e-05, "epoch": 0.6764912280701755, "percentage": 13.53, "elapsed_time": "0:32:30", "remaining_time": "3:27:48"} +{"current_steps": 4830, "total_steps": 35625, "loss": 0.6451, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.778206268309482e-05, "epoch": 0.6778947368421052, "percentage": 13.56, "elapsed_time": "0:32:35", "remaining_time": "3:27:50"} +{"current_steps": 4840, "total_steps": 35625, "loss": 0.6823, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.777297046774977e-05, "epoch": 0.6792982456140351, "percentage": 13.59, "elapsed_time": "0:32:41", "remaining_time": "3:27:53"} +{"current_steps": 4850, "total_steps": 35625, "loss": 0.6916, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7763860522866665e-05, "epoch": 0.6807017543859649, "percentage": 13.61, "elapsed_time": "0:32:44", "remaining_time": "3:27:48"} +{"current_steps": 4860, "total_steps": 35625, "loss": 0.5936, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.775473285553792e-05, "epoch": 0.6821052631578948, "percentage": 13.64, "elapsed_time": "0:32:48", "remaining_time": "3:27:40"} +{"current_steps": 4870, "total_steps": 35625, "loss": 0.7202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.774558747286973e-05, "epoch": 0.6835087719298245, "percentage": 13.67, "elapsed_time": "0:32:51", "remaining_time": "3:27:32"} +{"current_steps": 4880, "total_steps": 35625, "loss": 0.6405, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.77364243819821e-05, "epoch": 0.6849122807017544, "percentage": 13.7, "elapsed_time": "0:32:55", "remaining_time": "3:27:23"} +{"current_steps": 4890, "total_steps": 35625, "loss": 0.6704, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7727243590008806e-05, "epoch": 0.6863157894736842, "percentage": 13.73, "elapsed_time": "0:32:59", "remaining_time": "3:27:18"} +{"current_steps": 4900, "total_steps": 35625, "loss": 0.6304, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.771804510409741e-05, "epoch": 0.6877192982456141, "percentage": 13.75, "elapsed_time": "0:33:02", "remaining_time": "3:27:13"} +{"current_steps": 4910, "total_steps": 35625, "loss": 0.6645, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7708828931409236e-05, "epoch": 0.6891228070175439, "percentage": 13.78, "elapsed_time": "0:33:06", "remaining_time": "3:27:09"} +{"current_steps": 4920, "total_steps": 35625, "loss": 0.7018, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.769959507911941e-05, "epoch": 0.6905263157894737, "percentage": 13.81, "elapsed_time": "0:33:10", "remaining_time": "3:26:59"} +{"current_steps": 4930, "total_steps": 35625, "loss": 0.5191, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.769034355441678e-05, "epoch": 0.6919298245614035, "percentage": 13.84, "elapsed_time": "0:33:13", "remaining_time": "3:26:51"} +{"current_steps": 4940, "total_steps": 35625, "loss": 0.6723, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7681074364503995e-05, "epoch": 0.6933333333333334, "percentage": 13.87, "elapsed_time": "0:33:16", "remaining_time": "3:26:41"} +{"current_steps": 4950, "total_steps": 35625, "loss": 0.7069, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.767178751659743e-05, "epoch": 0.6947368421052632, "percentage": 13.89, "elapsed_time": "0:33:19", "remaining_time": "3:26:30"} +{"current_steps": 4960, "total_steps": 35625, "loss": 0.6333, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7662483017927215e-05, "epoch": 0.696140350877193, "percentage": 13.92, "elapsed_time": "0:33:22", "remaining_time": "3:26:21"} +{"current_steps": 4970, "total_steps": 35625, "loss": 0.7116, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.765316087573722e-05, "epoch": 0.6975438596491228, "percentage": 13.95, "elapsed_time": "0:33:26", "remaining_time": "3:26:18"} +{"current_steps": 4980, "total_steps": 35625, "loss": 0.5517, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7643821097285044e-05, "epoch": 0.6989473684210527, "percentage": 13.98, "elapsed_time": "0:33:31", "remaining_time": "3:26:19"} +{"current_steps": 4990, "total_steps": 35625, "loss": 0.6856, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.763446368984205e-05, "epoch": 0.7003508771929825, "percentage": 14.01, "elapsed_time": "0:33:36", "remaining_time": "3:26:19"} +{"current_steps": 5000, "total_steps": 35625, "loss": 0.6119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.762508866069327e-05, "epoch": 0.7017543859649122, "percentage": 14.04, "elapsed_time": "0:33:39", "remaining_time": "3:26:10"} +{"current_steps": 5010, "total_steps": 35625, "loss": 0.5645, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7615696017137504e-05, "epoch": 0.7031578947368421, "percentage": 14.06, "elapsed_time": "0:33:43", "remaining_time": "3:26:08"} +{"current_steps": 5020, "total_steps": 35625, "loss": 0.6506, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.760628576648723e-05, "epoch": 0.7045614035087719, "percentage": 14.09, "elapsed_time": "0:33:47", "remaining_time": "3:25:58"} +{"current_steps": 5030, "total_steps": 35625, "loss": 0.6092, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.759685791606868e-05, "epoch": 0.7059649122807018, "percentage": 14.12, "elapsed_time": "0:33:50", "remaining_time": "3:25:52"} +{"current_steps": 5040, "total_steps": 35625, "loss": 0.7659, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.758741247322174e-05, "epoch": 0.7073684210526315, "percentage": 14.15, "elapsed_time": "0:33:55", "remaining_time": "3:25:50"} +{"current_steps": 5050, "total_steps": 35625, "loss": 0.5774, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7577949445300004e-05, "epoch": 0.7087719298245614, "percentage": 14.18, "elapsed_time": "0:33:59", "remaining_time": "3:25:45"} +{"current_steps": 5060, "total_steps": 35625, "loss": 0.6234, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.756846883967077e-05, "epoch": 0.7101754385964912, "percentage": 14.2, "elapsed_time": "0:34:04", "remaining_time": "3:25:50"} +{"current_steps": 5070, "total_steps": 35625, "loss": 0.6456, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.755897066371502e-05, "epoch": 0.7115789473684211, "percentage": 14.23, "elapsed_time": "0:34:07", "remaining_time": "3:25:42"} +{"current_steps": 5080, "total_steps": 35625, "loss": 0.54, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.754945492482741e-05, "epoch": 0.7129824561403508, "percentage": 14.26, "elapsed_time": "0:34:11", "remaining_time": "3:25:33"} +{"current_steps": 5090, "total_steps": 35625, "loss": 0.5695, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7539921630416264e-05, "epoch": 0.7143859649122807, "percentage": 14.29, "elapsed_time": "0:34:14", "remaining_time": "3:25:25"} +{"current_steps": 5100, "total_steps": 35625, "loss": 0.6748, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7530370787903576e-05, "epoch": 0.7157894736842105, "percentage": 14.32, "elapsed_time": "0:34:17", "remaining_time": "3:25:17"} +{"current_steps": 5110, "total_steps": 35625, "loss": 0.595, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7520802404725007e-05, "epoch": 0.7171929824561404, "percentage": 14.34, "elapsed_time": "0:34:21", "remaining_time": "3:25:13"} +{"current_steps": 5120, "total_steps": 35625, "loss": 0.641, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.751121648832987e-05, "epoch": 0.7185964912280701, "percentage": 14.37, "elapsed_time": "0:34:26", "remaining_time": "3:25:10"} +{"current_steps": 5130, "total_steps": 35625, "loss": 0.6345, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.750161304618114e-05, "epoch": 0.72, "percentage": 14.4, "elapsed_time": "0:34:29", "remaining_time": "3:25:03"} +{"current_steps": 5140, "total_steps": 35625, "loss": 0.5997, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.749199208575541e-05, "epoch": 0.7214035087719298, "percentage": 14.43, "elapsed_time": "0:34:33", "remaining_time": "3:25:00"} +{"current_steps": 5150, "total_steps": 35625, "loss": 0.6529, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.748235361454293e-05, "epoch": 0.7228070175438597, "percentage": 14.46, "elapsed_time": "0:34:37", "remaining_time": "3:24:51"} +{"current_steps": 5160, "total_steps": 35625, "loss": 0.5668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7472697640047594e-05, "epoch": 0.7242105263157895, "percentage": 14.48, "elapsed_time": "0:34:40", "remaining_time": "3:24:45"} +{"current_steps": 5170, "total_steps": 35625, "loss": 0.6433, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7463024169786895e-05, "epoch": 0.7256140350877193, "percentage": 14.51, "elapsed_time": "0:34:45", "remaining_time": "3:24:42"} +{"current_steps": 5180, "total_steps": 35625, "loss": 0.6749, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.745333321129197e-05, "epoch": 0.7270175438596491, "percentage": 14.54, "elapsed_time": "0:34:48", "remaining_time": "3:24:37"} +{"current_steps": 5190, "total_steps": 35625, "loss": 0.7041, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.744362477210755e-05, "epoch": 0.728421052631579, "percentage": 14.57, "elapsed_time": "0:34:52", "remaining_time": "3:24:31"} +{"current_steps": 5200, "total_steps": 35625, "loss": 0.5598, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7433898859792e-05, "epoch": 0.7298245614035088, "percentage": 14.6, "elapsed_time": "0:34:58", "remaining_time": "3:24:36"} +{"current_steps": 5210, "total_steps": 35625, "loss": 0.6433, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.742415548191728e-05, "epoch": 0.7312280701754386, "percentage": 14.62, "elapsed_time": "0:35:02", "remaining_time": "3:24:31"} +{"current_steps": 5220, "total_steps": 35625, "loss": 0.6715, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.741439464606893e-05, "epoch": 0.7326315789473684, "percentage": 14.65, "elapsed_time": "0:35:05", "remaining_time": "3:24:26"} +{"current_steps": 5230, "total_steps": 35625, "loss": 0.6391, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.740461635984609e-05, "epoch": 0.7340350877192983, "percentage": 14.68, "elapsed_time": "0:35:09", "remaining_time": "3:24:20"} +{"current_steps": 5240, "total_steps": 35625, "loss": 0.5834, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.739482063086152e-05, "epoch": 0.7354385964912281, "percentage": 14.71, "elapsed_time": "0:35:13", "remaining_time": "3:24:16"} +{"current_steps": 5250, "total_steps": 35625, "loss": 0.7835, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.73850074667415e-05, "epoch": 0.7368421052631579, "percentage": 14.74, "elapsed_time": "0:35:16", "remaining_time": "3:24:07"} +{"current_steps": 5260, "total_steps": 35625, "loss": 0.6128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.737517687512593e-05, "epoch": 0.7382456140350877, "percentage": 14.76, "elapsed_time": "0:35:20", "remaining_time": "3:24:03"} +{"current_steps": 5270, "total_steps": 35625, "loss": 0.655, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7365328863668256e-05, "epoch": 0.7396491228070176, "percentage": 14.79, "elapsed_time": "0:35:25", "remaining_time": "3:24:00"} +{"current_steps": 5280, "total_steps": 35625, "loss": 0.6506, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.735546344003551e-05, "epoch": 0.7410526315789474, "percentage": 14.82, "elapsed_time": "0:35:28", "remaining_time": "3:23:51"} +{"current_steps": 5290, "total_steps": 35625, "loss": 0.6984, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.734558061190824e-05, "epoch": 0.7424561403508771, "percentage": 14.85, "elapsed_time": "0:35:31", "remaining_time": "3:23:45"} +{"current_steps": 5300, "total_steps": 35625, "loss": 0.7401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.733568038698057e-05, "epoch": 0.743859649122807, "percentage": 14.88, "elapsed_time": "0:35:34", "remaining_time": "3:23:35"} +{"current_steps": 5310, "total_steps": 35625, "loss": 0.6432, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.732576277296017e-05, "epoch": 0.7452631578947368, "percentage": 14.91, "elapsed_time": "0:35:40", "remaining_time": "3:23:38"} +{"current_steps": 5320, "total_steps": 35625, "loss": 0.6687, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.731582777756825e-05, "epoch": 0.7466666666666667, "percentage": 14.93, "elapsed_time": "0:35:43", "remaining_time": "3:23:32"} +{"current_steps": 5330, "total_steps": 35625, "loss": 0.6489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.730587540853954e-05, "epoch": 0.7480701754385964, "percentage": 14.96, "elapsed_time": "0:35:48", "remaining_time": "3:23:30"} +{"current_steps": 5340, "total_steps": 35625, "loss": 0.6149, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.729590567362228e-05, "epoch": 0.7494736842105263, "percentage": 14.99, "elapsed_time": "0:35:54", "remaining_time": "3:23:36"} +{"current_steps": 5350, "total_steps": 35625, "loss": 0.6227, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.728591858057827e-05, "epoch": 0.7508771929824561, "percentage": 15.02, "elapsed_time": "0:35:57", "remaining_time": "3:23:31"} +{"current_steps": 5360, "total_steps": 35625, "loss": 0.712, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.727591413718282e-05, "epoch": 0.752280701754386, "percentage": 15.05, "elapsed_time": "0:36:01", "remaining_time": "3:23:26"} +{"current_steps": 5370, "total_steps": 35625, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7265892351224694e-05, "epoch": 0.7536842105263157, "percentage": 15.07, "elapsed_time": "0:36:07", "remaining_time": "3:23:34"} +{"current_steps": 5380, "total_steps": 35625, "loss": 0.6812, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.725585323050623e-05, "epoch": 0.7550877192982456, "percentage": 15.1, "elapsed_time": "0:36:11", "remaining_time": "3:23:30"} +{"current_steps": 5390, "total_steps": 35625, "loss": 0.6266, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.72457967828432e-05, "epoch": 0.7564912280701754, "percentage": 15.13, "elapsed_time": "0:36:15", "remaining_time": "3:23:25"} +{"current_steps": 5400, "total_steps": 35625, "loss": 0.6976, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.723572301606492e-05, "epoch": 0.7578947368421053, "percentage": 15.16, "elapsed_time": "0:36:19", "remaining_time": "3:23:20"} +{"current_steps": 5410, "total_steps": 35625, "loss": 0.709, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7225631938014134e-05, "epoch": 0.7592982456140351, "percentage": 15.19, "elapsed_time": "0:36:23", "remaining_time": "3:23:12"} +{"current_steps": 5420, "total_steps": 35625, "loss": 0.5956, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7215523556547116e-05, "epoch": 0.7607017543859649, "percentage": 15.21, "elapsed_time": "0:36:26", "remaining_time": "3:23:03"} +{"current_steps": 5430, "total_steps": 35625, "loss": 0.6943, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.720539787953357e-05, "epoch": 0.7621052631578947, "percentage": 15.24, "elapsed_time": "0:36:30", "remaining_time": "3:22:59"} +{"current_steps": 5440, "total_steps": 35625, "loss": 0.6322, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.71952549148567e-05, "epoch": 0.7635087719298246, "percentage": 15.27, "elapsed_time": "0:36:35", "remaining_time": "3:23:03"} +{"current_steps": 5450, "total_steps": 35625, "loss": 0.6258, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7185094670413134e-05, "epoch": 0.7649122807017544, "percentage": 15.3, "elapsed_time": "0:36:39", "remaining_time": "3:22:55"} +{"current_steps": 5460, "total_steps": 35625, "loss": 0.6347, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7174917154112984e-05, "epoch": 0.7663157894736842, "percentage": 15.33, "elapsed_time": "0:36:42", "remaining_time": "3:22:48"} +{"current_steps": 5470, "total_steps": 35625, "loss": 0.6423, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.716472237387979e-05, "epoch": 0.767719298245614, "percentage": 15.35, "elapsed_time": "0:36:47", "remaining_time": "3:22:47"} +{"current_steps": 5480, "total_steps": 35625, "loss": 0.6614, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.715451033765054e-05, "epoch": 0.7691228070175439, "percentage": 15.38, "elapsed_time": "0:36:51", "remaining_time": "3:22:42"} +{"current_steps": 5490, "total_steps": 35625, "loss": 0.6326, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.714428105337565e-05, "epoch": 0.7705263157894737, "percentage": 15.41, "elapsed_time": "0:36:54", "remaining_time": "3:22:33"} +{"current_steps": 5500, "total_steps": 35625, "loss": 0.6146, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.713403452901898e-05, "epoch": 0.7719298245614035, "percentage": 15.44, "elapsed_time": "0:36:59", "remaining_time": "3:22:37"} +{"current_steps": 5510, "total_steps": 35625, "loss": 0.6061, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7123770772557774e-05, "epoch": 0.7733333333333333, "percentage": 15.47, "elapsed_time": "0:37:04", "remaining_time": "3:22:37"} +{"current_steps": 5520, "total_steps": 35625, "loss": 0.7423, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.711348979198274e-05, "epoch": 0.7747368421052632, "percentage": 15.49, "elapsed_time": "0:37:07", "remaining_time": "3:22:31"} +{"current_steps": 5530, "total_steps": 35625, "loss": 0.6648, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.710319159529798e-05, "epoch": 0.776140350877193, "percentage": 15.52, "elapsed_time": "0:37:12", "remaining_time": "3:22:27"} +{"current_steps": 5540, "total_steps": 35625, "loss": 0.551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.709287619052098e-05, "epoch": 0.7775438596491228, "percentage": 15.55, "elapsed_time": "0:37:15", "remaining_time": "3:22:20"} +{"current_steps": 5550, "total_steps": 35625, "loss": 0.7394, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.708254358568264e-05, "epoch": 0.7789473684210526, "percentage": 15.58, "elapsed_time": "0:37:18", "remaining_time": "3:22:12"} +{"current_steps": 5560, "total_steps": 35625, "loss": 0.57, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7072193788827236e-05, "epoch": 0.7803508771929825, "percentage": 15.61, "elapsed_time": "0:37:23", "remaining_time": "3:22:09"} +{"current_steps": 5570, "total_steps": 35625, "loss": 0.6293, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.706182680801245e-05, "epoch": 0.7817543859649123, "percentage": 15.64, "elapsed_time": "0:37:26", "remaining_time": "3:22:03"} +{"current_steps": 5580, "total_steps": 35625, "loss": 0.6007, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.705144265130934e-05, "epoch": 0.783157894736842, "percentage": 15.66, "elapsed_time": "0:37:31", "remaining_time": "3:22:04"} +{"current_steps": 5590, "total_steps": 35625, "loss": 0.5963, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.704104132680231e-05, "epoch": 0.7845614035087719, "percentage": 15.69, "elapsed_time": "0:37:34", "remaining_time": "3:21:54"} +{"current_steps": 5600, "total_steps": 35625, "loss": 0.7237, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.703062284258916e-05, "epoch": 0.7859649122807018, "percentage": 15.72, "elapsed_time": "0:37:38", "remaining_time": "3:21:50"} +{"current_steps": 5610, "total_steps": 35625, "loss": 0.6452, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.702018720678103e-05, "epoch": 0.7873684210526316, "percentage": 15.75, "elapsed_time": "0:37:42", "remaining_time": "3:21:46"} +{"current_steps": 5620, "total_steps": 35625, "loss": 0.6291, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7009734427502426e-05, "epoch": 0.7887719298245615, "percentage": 15.78, "elapsed_time": "0:37:47", "remaining_time": "3:21:43"} +{"current_steps": 5630, "total_steps": 35625, "loss": 0.5925, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.699926451289119e-05, "epoch": 0.7901754385964912, "percentage": 15.8, "elapsed_time": "0:37:52", "remaining_time": "3:21:46"} +{"current_steps": 5640, "total_steps": 35625, "loss": 0.7342, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.698877747109852e-05, "epoch": 0.791578947368421, "percentage": 15.83, "elapsed_time": "0:37:56", "remaining_time": "3:21:44"} +{"current_steps": 5650, "total_steps": 35625, "loss": 0.644, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.697827331028893e-05, "epoch": 0.7929824561403509, "percentage": 15.86, "elapsed_time": "0:38:00", "remaining_time": "3:21:36"} +{"current_steps": 5660, "total_steps": 35625, "loss": 0.6567, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6967752038640264e-05, "epoch": 0.7943859649122808, "percentage": 15.89, "elapsed_time": "0:38:03", "remaining_time": "3:21:28"} +{"current_steps": 5670, "total_steps": 35625, "loss": 0.6873, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.695721366434369e-05, "epoch": 0.7957894736842105, "percentage": 15.92, "elapsed_time": "0:38:06", "remaining_time": "3:21:22"} +{"current_steps": 5680, "total_steps": 35625, "loss": 0.6733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.694665819560371e-05, "epoch": 0.7971929824561403, "percentage": 15.94, "elapsed_time": "0:38:10", "remaining_time": "3:21:13"} +{"current_steps": 5690, "total_steps": 35625, "loss": 0.642, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.693608564063811e-05, "epoch": 0.7985964912280702, "percentage": 15.97, "elapsed_time": "0:38:13", "remaining_time": "3:21:05"} +{"current_steps": 5700, "total_steps": 35625, "loss": 0.5438, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.692549600767798e-05, "epoch": 0.8, "percentage": 16.0, "elapsed_time": "0:38:19", "remaining_time": "3:21:12"} +{"current_steps": 5710, "total_steps": 35625, "loss": 0.6107, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6914889304967725e-05, "epoch": 0.8014035087719298, "percentage": 16.03, "elapsed_time": "0:38:23", "remaining_time": "3:21:05"} +{"current_steps": 5720, "total_steps": 35625, "loss": 0.5975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.690426554076501e-05, "epoch": 0.8028070175438596, "percentage": 16.06, "elapsed_time": "0:38:27", "remaining_time": "3:21:06"} +{"current_steps": 5730, "total_steps": 35625, "loss": 0.6563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.689362472334082e-05, "epoch": 0.8042105263157895, "percentage": 16.08, "elapsed_time": "0:38:33", "remaining_time": "3:21:10"} +{"current_steps": 5740, "total_steps": 35625, "loss": 0.6199, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.688296686097937e-05, "epoch": 0.8056140350877193, "percentage": 16.11, "elapsed_time": "0:38:36", "remaining_time": "3:21:00"} +{"current_steps": 5750, "total_steps": 35625, "loss": 0.5772, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6872291961978195e-05, "epoch": 0.8070175438596491, "percentage": 16.14, "elapsed_time": "0:38:40", "remaining_time": "3:20:54"} +{"current_steps": 5760, "total_steps": 35625, "loss": 0.6401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6861600034648064e-05, "epoch": 0.8084210526315789, "percentage": 16.17, "elapsed_time": "0:38:43", "remaining_time": "3:20:47"} +{"current_steps": 5770, "total_steps": 35625, "loss": 0.6087, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6850891087313e-05, "epoch": 0.8098245614035088, "percentage": 16.2, "elapsed_time": "0:38:48", "remaining_time": "3:20:47"} +{"current_steps": 5780, "total_steps": 35625, "loss": 0.6973, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6840165128310296e-05, "epoch": 0.8112280701754386, "percentage": 16.22, "elapsed_time": "0:38:52", "remaining_time": "3:20:42"} +{"current_steps": 5790, "total_steps": 35625, "loss": 0.6509, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6829422165990475e-05, "epoch": 0.8126315789473684, "percentage": 16.25, "elapsed_time": "0:38:55", "remaining_time": "3:20:36"} +{"current_steps": 5800, "total_steps": 35625, "loss": 0.6092, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6818662208717296e-05, "epoch": 0.8140350877192982, "percentage": 16.28, "elapsed_time": "0:39:00", "remaining_time": "3:20:33"} +{"current_steps": 5810, "total_steps": 35625, "loss": 0.5864, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.680788526486776e-05, "epoch": 0.8154385964912281, "percentage": 16.31, "elapsed_time": "0:39:04", "remaining_time": "3:20:31"} +{"current_steps": 5820, "total_steps": 35625, "loss": 0.5736, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.679709134283209e-05, "epoch": 0.8168421052631579, "percentage": 16.34, "elapsed_time": "0:39:08", "remaining_time": "3:20:25"} +{"current_steps": 5830, "total_steps": 35625, "loss": 0.5982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.678628045101371e-05, "epoch": 0.8182456140350877, "percentage": 16.36, "elapsed_time": "0:39:12", "remaining_time": "3:20:20"} +{"current_steps": 5840, "total_steps": 35625, "loss": 0.6136, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.677545259782929e-05, "epoch": 0.8196491228070175, "percentage": 16.39, "elapsed_time": "0:39:17", "remaining_time": "3:20:23"} +{"current_steps": 5850, "total_steps": 35625, "loss": 0.6519, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.676460779170867e-05, "epoch": 0.8210526315789474, "percentage": 16.42, "elapsed_time": "0:39:21", "remaining_time": "3:20:17"} +{"current_steps": 5860, "total_steps": 35625, "loss": 0.6122, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.675374604109491e-05, "epoch": 0.8224561403508772, "percentage": 16.45, "elapsed_time": "0:39:24", "remaining_time": "3:20:08"} +{"current_steps": 5870, "total_steps": 35625, "loss": 0.5582, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6742867354444256e-05, "epoch": 0.8238596491228071, "percentage": 16.48, "elapsed_time": "0:39:27", "remaining_time": "3:19:58"} +{"current_steps": 5880, "total_steps": 35625, "loss": 0.6788, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.673197174022613e-05, "epoch": 0.8252631578947368, "percentage": 16.51, "elapsed_time": "0:39:30", "remaining_time": "3:19:52"} +{"current_steps": 5890, "total_steps": 35625, "loss": 0.647, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.672105920692316e-05, "epoch": 0.8266666666666667, "percentage": 16.53, "elapsed_time": "0:39:35", "remaining_time": "3:19:51"} +{"current_steps": 5900, "total_steps": 35625, "loss": 0.6326, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6710129763031095e-05, "epoch": 0.8280701754385965, "percentage": 16.56, "elapsed_time": "0:39:38", "remaining_time": "3:19:43"} +{"current_steps": 5910, "total_steps": 35625, "loss": 0.6205, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.669918341705891e-05, "epoch": 0.8294736842105264, "percentage": 16.59, "elapsed_time": "0:39:41", "remaining_time": "3:19:35"} +{"current_steps": 5920, "total_steps": 35625, "loss": 0.699, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.66882201775287e-05, "epoch": 0.8308771929824561, "percentage": 16.62, "elapsed_time": "0:39:46", "remaining_time": "3:19:34"} +{"current_steps": 5930, "total_steps": 35625, "loss": 0.6147, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.667724005297573e-05, "epoch": 0.832280701754386, "percentage": 16.65, "elapsed_time": "0:39:50", "remaining_time": "3:19:29"} +{"current_steps": 5940, "total_steps": 35625, "loss": 0.5737, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.66662430519484e-05, "epoch": 0.8336842105263158, "percentage": 16.67, "elapsed_time": "0:39:55", "remaining_time": "3:19:29"} +{"current_steps": 5950, "total_steps": 35625, "loss": 0.6072, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.665522918300823e-05, "epoch": 0.8350877192982457, "percentage": 16.7, "elapsed_time": "0:39:59", "remaining_time": "3:19:27"} +{"current_steps": 5960, "total_steps": 35625, "loss": 0.6296, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6644198454729933e-05, "epoch": 0.8364912280701754, "percentage": 16.73, "elapsed_time": "0:40:03", "remaining_time": "3:19:23"} +{"current_steps": 5970, "total_steps": 35625, "loss": 0.6489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.663315087570128e-05, "epoch": 0.8378947368421052, "percentage": 16.76, "elapsed_time": "0:40:06", "remaining_time": "3:19:13"} +{"current_steps": 5980, "total_steps": 35625, "loss": 0.6742, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.662208645452321e-05, "epoch": 0.8392982456140351, "percentage": 16.79, "elapsed_time": "0:40:10", "remaining_time": "3:19:11"} +{"current_steps": 5990, "total_steps": 35625, "loss": 0.573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.661100519980973e-05, "epoch": 0.840701754385965, "percentage": 16.81, "elapsed_time": "0:40:13", "remaining_time": "3:19:02"} +{"current_steps": 6000, "total_steps": 35625, "loss": 0.6455, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6599907120188005e-05, "epoch": 0.8421052631578947, "percentage": 16.84, "elapsed_time": "0:40:16", "remaining_time": "3:18:53"} +{"current_steps": 6000, "total_steps": 35625, "loss": null, "eval_loss": 0.6415141820907593, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.8421052631578947, "percentage": 16.84, "elapsed_time": "0:40:16", "remaining_time": "3:18:53"} +{"current_steps": 6010, "total_steps": 35625, "loss": 0.5362, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.658879222429825e-05, "epoch": 0.8435087719298245, "percentage": 16.87, "elapsed_time": "0:41:05", "remaining_time": "3:22:30"} +{"current_steps": 6020, "total_steps": 35625, "loss": 0.6321, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.65776605207938e-05, "epoch": 0.8449122807017544, "percentage": 16.9, "elapsed_time": "0:41:09", "remaining_time": "3:22:25"} +{"current_steps": 6030, "total_steps": 35625, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.656651201834106e-05, "epoch": 0.8463157894736842, "percentage": 16.93, "elapsed_time": "0:41:13", "remaining_time": "3:22:19"} +{"current_steps": 6040, "total_steps": 35625, "loss": 0.6529, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.655534672561953e-05, "epoch": 0.847719298245614, "percentage": 16.95, "elapsed_time": "0:41:16", "remaining_time": "3:22:09"} +{"current_steps": 6050, "total_steps": 35625, "loss": 0.6515, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.654416465132177e-05, "epoch": 0.8491228070175438, "percentage": 16.98, "elapsed_time": "0:41:20", "remaining_time": "3:22:03"} +{"current_steps": 6060, "total_steps": 35625, "loss": 0.613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6532965804153416e-05, "epoch": 0.8505263157894737, "percentage": 17.01, "elapsed_time": "0:41:23", "remaining_time": "3:21:57"} +{"current_steps": 6070, "total_steps": 35625, "loss": 0.6215, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.652175019283314e-05, "epoch": 0.8519298245614035, "percentage": 17.04, "elapsed_time": "0:41:27", "remaining_time": "3:21:50"} +{"current_steps": 6080, "total_steps": 35625, "loss": 0.7427, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6510517826092695e-05, "epoch": 0.8533333333333334, "percentage": 17.07, "elapsed_time": "0:41:30", "remaining_time": "3:21:42"} +{"current_steps": 6090, "total_steps": 35625, "loss": 0.58, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.649926871267685e-05, "epoch": 0.8547368421052631, "percentage": 17.09, "elapsed_time": "0:41:34", "remaining_time": "3:21:38"} +{"current_steps": 6100, "total_steps": 35625, "loss": 0.6916, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6488002861343425e-05, "epoch": 0.856140350877193, "percentage": 17.12, "elapsed_time": "0:41:38", "remaining_time": "3:21:33"} +{"current_steps": 6110, "total_steps": 35625, "loss": 0.6073, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.647672028086328e-05, "epoch": 0.8575438596491228, "percentage": 17.15, "elapsed_time": "0:41:42", "remaining_time": "3:21:28"} +{"current_steps": 6120, "total_steps": 35625, "loss": 0.6273, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.646542098002029e-05, "epoch": 0.8589473684210527, "percentage": 17.18, "elapsed_time": "0:41:45", "remaining_time": "3:21:20"} +{"current_steps": 6130, "total_steps": 35625, "loss": 0.6657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.645410496761135e-05, "epoch": 0.8603508771929824, "percentage": 17.21, "elapsed_time": "0:41:49", "remaining_time": "3:21:13"} +{"current_steps": 6140, "total_steps": 35625, "loss": 0.6861, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.644277225244635e-05, "epoch": 0.8617543859649123, "percentage": 17.24, "elapsed_time": "0:41:52", "remaining_time": "3:21:04"} +{"current_steps": 6150, "total_steps": 35625, "loss": 0.6834, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6431422843348216e-05, "epoch": 0.8631578947368421, "percentage": 17.26, "elapsed_time": "0:41:55", "remaining_time": "3:20:57"} +{"current_steps": 6160, "total_steps": 35625, "loss": 0.6098, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.642005674915284e-05, "epoch": 0.864561403508772, "percentage": 17.29, "elapsed_time": "0:42:00", "remaining_time": "3:20:54"} +{"current_steps": 6170, "total_steps": 35625, "loss": 0.6831, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.640867397870912e-05, "epoch": 0.8659649122807017, "percentage": 17.32, "elapsed_time": "0:42:03", "remaining_time": "3:20:49"} +{"current_steps": 6180, "total_steps": 35625, "loss": 0.5846, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.639727454087892e-05, "epoch": 0.8673684210526316, "percentage": 17.35, "elapsed_time": "0:42:06", "remaining_time": "3:20:39"} +{"current_steps": 6190, "total_steps": 35625, "loss": 0.6436, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.638585844453711e-05, "epoch": 0.8687719298245614, "percentage": 17.38, "elapsed_time": "0:42:11", "remaining_time": "3:20:40"} +{"current_steps": 6200, "total_steps": 35625, "loss": 0.7538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6374425698571514e-05, "epoch": 0.8701754385964913, "percentage": 17.4, "elapsed_time": "0:42:15", "remaining_time": "3:20:32"} +{"current_steps": 6210, "total_steps": 35625, "loss": 0.596, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.63629763118829e-05, "epoch": 0.871578947368421, "percentage": 17.43, "elapsed_time": "0:42:19", "remaining_time": "3:20:29"} +{"current_steps": 6220, "total_steps": 35625, "loss": 0.5844, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6351510293385026e-05, "epoch": 0.8729824561403509, "percentage": 17.46, "elapsed_time": "0:42:23", "remaining_time": "3:20:25"} +{"current_steps": 6230, "total_steps": 35625, "loss": 0.5785, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.634002765200456e-05, "epoch": 0.8743859649122807, "percentage": 17.49, "elapsed_time": "0:42:26", "remaining_time": "3:20:17"} +{"current_steps": 6240, "total_steps": 35625, "loss": 0.5728, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.632852839668115e-05, "epoch": 0.8757894736842106, "percentage": 17.52, "elapsed_time": "0:42:30", "remaining_time": "3:20:09"} +{"current_steps": 6250, "total_steps": 35625, "loss": 0.6317, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6317012536367354e-05, "epoch": 0.8771929824561403, "percentage": 17.54, "elapsed_time": "0:42:33", "remaining_time": "3:20:02"} +{"current_steps": 6260, "total_steps": 35625, "loss": 0.6152, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.630548008002866e-05, "epoch": 0.8785964912280702, "percentage": 17.57, "elapsed_time": "0:42:38", "remaining_time": "3:20:02"} +{"current_steps": 6270, "total_steps": 35625, "loss": 0.64, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.629393103664349e-05, "epoch": 0.88, "percentage": 17.6, "elapsed_time": "0:42:41", "remaining_time": "3:19:54"} +{"current_steps": 6280, "total_steps": 35625, "loss": 0.5923, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6282365415203164e-05, "epoch": 0.8814035087719299, "percentage": 17.63, "elapsed_time": "0:42:45", "remaining_time": "3:19:49"} +{"current_steps": 6290, "total_steps": 35625, "loss": 0.6745, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.627078322471191e-05, "epoch": 0.8828070175438596, "percentage": 17.66, "elapsed_time": "0:42:49", "remaining_time": "3:19:42"} +{"current_steps": 6300, "total_steps": 35625, "loss": 0.5819, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.625918447418687e-05, "epoch": 0.8842105263157894, "percentage": 17.68, "elapsed_time": "0:42:53", "remaining_time": "3:19:39"} +{"current_steps": 6310, "total_steps": 35625, "loss": 0.5965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.624756917265807e-05, "epoch": 0.8856140350877193, "percentage": 17.71, "elapsed_time": "0:42:59", "remaining_time": "3:19:41"} +{"current_steps": 6320, "total_steps": 35625, "loss": 0.5838, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.62359373291684e-05, "epoch": 0.8870175438596491, "percentage": 17.74, "elapsed_time": "0:43:03", "remaining_time": "3:19:40"} +{"current_steps": 6330, "total_steps": 35625, "loss": 0.7304, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.622428895277367e-05, "epoch": 0.888421052631579, "percentage": 17.77, "elapsed_time": "0:43:08", "remaining_time": "3:19:38"} +{"current_steps": 6340, "total_steps": 35625, "loss": 0.5938, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.621262405254253e-05, "epoch": 0.8898245614035087, "percentage": 17.8, "elapsed_time": "0:43:11", "remaining_time": "3:19:29"} +{"current_steps": 6350, "total_steps": 35625, "loss": 0.6276, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.620094263755652e-05, "epoch": 0.8912280701754386, "percentage": 17.82, "elapsed_time": "0:43:15", "remaining_time": "3:19:25"} +{"current_steps": 6360, "total_steps": 35625, "loss": 0.613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.618924471691e-05, "epoch": 0.8926315789473684, "percentage": 17.85, "elapsed_time": "0:43:19", "remaining_time": "3:19:22"} +{"current_steps": 6370, "total_steps": 35625, "loss": 0.599, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.617753029971021e-05, "epoch": 0.8940350877192983, "percentage": 17.88, "elapsed_time": "0:43:23", "remaining_time": "3:19:17"} +{"current_steps": 6380, "total_steps": 35625, "loss": 0.6358, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6165799395077236e-05, "epoch": 0.895438596491228, "percentage": 17.91, "elapsed_time": "0:43:27", "remaining_time": "3:19:14"} +{"current_steps": 6390, "total_steps": 35625, "loss": 0.6747, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.615405201214398e-05, "epoch": 0.8968421052631579, "percentage": 17.94, "elapsed_time": "0:43:31", "remaining_time": "3:19:06"} +{"current_steps": 6400, "total_steps": 35625, "loss": 0.6082, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.614228816005618e-05, "epoch": 0.8982456140350877, "percentage": 17.96, "elapsed_time": "0:43:34", "remaining_time": "3:18:59"} +{"current_steps": 6410, "total_steps": 35625, "loss": 0.5506, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.61305078479724e-05, "epoch": 0.8996491228070176, "percentage": 17.99, "elapsed_time": "0:43:39", "remaining_time": "3:18:57"} +{"current_steps": 6420, "total_steps": 35625, "loss": 0.5816, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.611871108506403e-05, "epoch": 0.9010526315789473, "percentage": 18.02, "elapsed_time": "0:43:43", "remaining_time": "3:18:54"} +{"current_steps": 6430, "total_steps": 35625, "loss": 0.6178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.610689788051523e-05, "epoch": 0.9024561403508772, "percentage": 18.05, "elapsed_time": "0:43:47", "remaining_time": "3:18:49"} +{"current_steps": 6440, "total_steps": 35625, "loss": 0.644, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6095068243523e-05, "epoch": 0.903859649122807, "percentage": 18.08, "elapsed_time": "0:43:50", "remaining_time": "3:18:41"} +{"current_steps": 6450, "total_steps": 35625, "loss": 0.5564, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.608322218329711e-05, "epoch": 0.9052631578947369, "percentage": 18.11, "elapsed_time": "0:43:55", "remaining_time": "3:18:41"} +{"current_steps": 6460, "total_steps": 35625, "loss": 0.6534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.607135970906014e-05, "epoch": 0.9066666666666666, "percentage": 18.13, "elapsed_time": "0:43:59", "remaining_time": "3:18:36"} +{"current_steps": 6470, "total_steps": 35625, "loss": 0.6671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.605948083004741e-05, "epoch": 0.9080701754385965, "percentage": 18.16, "elapsed_time": "0:44:03", "remaining_time": "3:18:30"} +{"current_steps": 6480, "total_steps": 35625, "loss": 0.6996, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6047585555507045e-05, "epoch": 0.9094736842105263, "percentage": 18.19, "elapsed_time": "0:44:07", "remaining_time": "3:18:27"} +{"current_steps": 6490, "total_steps": 35625, "loss": 0.5937, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.603567389469993e-05, "epoch": 0.9108771929824562, "percentage": 18.22, "elapsed_time": "0:44:10", "remaining_time": "3:18:20"} +{"current_steps": 6500, "total_steps": 35625, "loss": 0.555, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.60237458568997e-05, "epoch": 0.9122807017543859, "percentage": 18.25, "elapsed_time": "0:44:14", "remaining_time": "3:18:12"} +{"current_steps": 6510, "total_steps": 35625, "loss": 0.6721, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6011801451392736e-05, "epoch": 0.9136842105263158, "percentage": 18.27, "elapsed_time": "0:44:17", "remaining_time": "3:18:05"} +{"current_steps": 6520, "total_steps": 35625, "loss": 0.5849, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5999840687478167e-05, "epoch": 0.9150877192982456, "percentage": 18.3, "elapsed_time": "0:44:21", "remaining_time": "3:18:01"} +{"current_steps": 6530, "total_steps": 35625, "loss": 0.6013, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.598786357446786e-05, "epoch": 0.9164912280701755, "percentage": 18.33, "elapsed_time": "0:44:24", "remaining_time": "3:17:52"} +{"current_steps": 6540, "total_steps": 35625, "loss": 0.6178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5975870121686406e-05, "epoch": 0.9178947368421052, "percentage": 18.36, "elapsed_time": "0:44:28", "remaining_time": "3:17:47"} +{"current_steps": 6550, "total_steps": 35625, "loss": 0.5985, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.596386033847111e-05, "epoch": 0.9192982456140351, "percentage": 18.39, "elapsed_time": "0:44:33", "remaining_time": "3:17:48"} +{"current_steps": 6560, "total_steps": 35625, "loss": 0.5878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5951834234172025e-05, "epoch": 0.9207017543859649, "percentage": 18.41, "elapsed_time": "0:44:38", "remaining_time": "3:17:47"} +{"current_steps": 6570, "total_steps": 35625, "loss": 0.7004, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.593979181815187e-05, "epoch": 0.9221052631578948, "percentage": 18.44, "elapsed_time": "0:44:42", "remaining_time": "3:17:43"} +{"current_steps": 6580, "total_steps": 35625, "loss": 0.615, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5927733099786066e-05, "epoch": 0.9235087719298246, "percentage": 18.47, "elapsed_time": "0:44:45", "remaining_time": "3:17:34"} +{"current_steps": 6590, "total_steps": 35625, "loss": 0.5789, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.591565808846276e-05, "epoch": 0.9249122807017544, "percentage": 18.5, "elapsed_time": "0:44:49", "remaining_time": "3:17:30"} +{"current_steps": 6600, "total_steps": 35625, "loss": 0.5827, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5903566793582755e-05, "epoch": 0.9263157894736842, "percentage": 18.53, "elapsed_time": "0:44:52", "remaining_time": "3:17:22"} +{"current_steps": 6610, "total_steps": 35625, "loss": 0.6176, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.589145922455954e-05, "epoch": 0.927719298245614, "percentage": 18.55, "elapsed_time": "0:44:56", "remaining_time": "3:17:16"} +{"current_steps": 6620, "total_steps": 35625, "loss": 0.7056, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.587933539081927e-05, "epoch": 0.9291228070175439, "percentage": 18.58, "elapsed_time": "0:44:59", "remaining_time": "3:17:07"} +{"current_steps": 6630, "total_steps": 35625, "loss": 0.6418, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.586719530180075e-05, "epoch": 0.9305263157894736, "percentage": 18.61, "elapsed_time": "0:45:02", "remaining_time": "3:16:59"} +{"current_steps": 6640, "total_steps": 35625, "loss": 0.5231, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.585503896695549e-05, "epoch": 0.9319298245614035, "percentage": 18.64, "elapsed_time": "0:45:05", "remaining_time": "3:16:49"} +{"current_steps": 6650, "total_steps": 35625, "loss": 0.7463, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.584286639574758e-05, "epoch": 0.9333333333333333, "percentage": 18.67, "elapsed_time": "0:45:08", "remaining_time": "3:16:41"} +{"current_steps": 6660, "total_steps": 35625, "loss": 0.6373, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.58306775976538e-05, "epoch": 0.9347368421052632, "percentage": 18.69, "elapsed_time": "0:45:12", "remaining_time": "3:16:34"} +{"current_steps": 6670, "total_steps": 35625, "loss": 0.6609, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.581847258216355e-05, "epoch": 0.9361403508771929, "percentage": 18.72, "elapsed_time": "0:45:15", "remaining_time": "3:16:26"} +{"current_steps": 6680, "total_steps": 35625, "loss": 0.6366, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.580625135877884e-05, "epoch": 0.9375438596491228, "percentage": 18.75, "elapsed_time": "0:45:18", "remaining_time": "3:16:17"} +{"current_steps": 6690, "total_steps": 35625, "loss": 0.6359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5794013937014326e-05, "epoch": 0.9389473684210526, "percentage": 18.78, "elapsed_time": "0:45:23", "remaining_time": "3:16:20"} +{"current_steps": 6700, "total_steps": 35625, "loss": 0.7239, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.578176032639724e-05, "epoch": 0.9403508771929825, "percentage": 18.81, "elapsed_time": "0:45:27", "remaining_time": "3:16:15"} +{"current_steps": 6710, "total_steps": 35625, "loss": 0.5848, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5769490536467465e-05, "epoch": 0.9417543859649122, "percentage": 18.84, "elapsed_time": "0:45:32", "remaining_time": "3:16:13"} +{"current_steps": 6720, "total_steps": 35625, "loss": 0.6155, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5757204576777437e-05, "epoch": 0.9431578947368421, "percentage": 18.86, "elapsed_time": "0:45:36", "remaining_time": "3:16:11"} +{"current_steps": 6730, "total_steps": 35625, "loss": 0.5817, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.574490245689219e-05, "epoch": 0.9445614035087719, "percentage": 18.89, "elapsed_time": "0:45:39", "remaining_time": "3:16:02"} +{"current_steps": 6740, "total_steps": 35625, "loss": 0.4903, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.573258418638936e-05, "epoch": 0.9459649122807018, "percentage": 18.92, "elapsed_time": "0:45:43", "remaining_time": "3:15:56"} +{"current_steps": 6750, "total_steps": 35625, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.572024977485914e-05, "epoch": 0.9473684210526315, "percentage": 18.95, "elapsed_time": "0:45:47", "remaining_time": "3:15:51"} +{"current_steps": 6760, "total_steps": 35625, "loss": 0.6133, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5707899231904286e-05, "epoch": 0.9487719298245614, "percentage": 18.98, "elapsed_time": "0:45:51", "remaining_time": "3:15:50"} +{"current_steps": 6770, "total_steps": 35625, "loss": 0.638, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.569553256714012e-05, "epoch": 0.9501754385964912, "percentage": 19.0, "elapsed_time": "0:45:56", "remaining_time": "3:15:48"} +{"current_steps": 6780, "total_steps": 35625, "loss": 0.7599, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5683149790194526e-05, "epoch": 0.9515789473684211, "percentage": 19.03, "elapsed_time": "0:46:00", "remaining_time": "3:15:43"} +{"current_steps": 6790, "total_steps": 35625, "loss": 0.6906, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5670750910707903e-05, "epoch": 0.9529824561403509, "percentage": 19.06, "elapsed_time": "0:46:03", "remaining_time": "3:15:35"} +{"current_steps": 6800, "total_steps": 35625, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.565833593833321e-05, "epoch": 0.9543859649122807, "percentage": 19.09, "elapsed_time": "0:46:07", "remaining_time": "3:15:30"} +{"current_steps": 6810, "total_steps": 35625, "loss": 0.5566, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5645904882735935e-05, "epoch": 0.9557894736842105, "percentage": 19.12, "elapsed_time": "0:46:10", "remaining_time": "3:15:23"} +{"current_steps": 6820, "total_steps": 35625, "loss": 0.5748, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.563345775359408e-05, "epoch": 0.9571929824561404, "percentage": 19.14, "elapsed_time": "0:46:13", "remaining_time": "3:15:15"} +{"current_steps": 6830, "total_steps": 35625, "loss": 0.6256, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.562099456059815e-05, "epoch": 0.9585964912280702, "percentage": 19.17, "elapsed_time": "0:46:17", "remaining_time": "3:15:09"} +{"current_steps": 6840, "total_steps": 35625, "loss": 0.5826, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5608515313451186e-05, "epoch": 0.96, "percentage": 19.2, "elapsed_time": "0:46:21", "remaining_time": "3:15:05"} +{"current_steps": 6850, "total_steps": 35625, "loss": 0.5538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.559602002186869e-05, "epoch": 0.9614035087719298, "percentage": 19.23, "elapsed_time": "0:46:24", "remaining_time": "3:14:58"} +{"current_steps": 6860, "total_steps": 35625, "loss": 0.6514, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.558350869557868e-05, "epoch": 0.9628070175438597, "percentage": 19.26, "elapsed_time": "0:46:27", "remaining_time": "3:14:49"} +{"current_steps": 6870, "total_steps": 35625, "loss": 0.7813, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.557098134432167e-05, "epoch": 0.9642105263157895, "percentage": 19.28, "elapsed_time": "0:46:31", "remaining_time": "3:14:45"} +{"current_steps": 6880, "total_steps": 35625, "loss": 0.5993, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.555843797785061e-05, "epoch": 0.9656140350877193, "percentage": 19.31, "elapsed_time": "0:46:34", "remaining_time": "3:14:36"} +{"current_steps": 6890, "total_steps": 35625, "loss": 0.5594, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.554587860593095e-05, "epoch": 0.9670175438596491, "percentage": 19.34, "elapsed_time": "0:46:37", "remaining_time": "3:14:27"} +{"current_steps": 6900, "total_steps": 35625, "loss": 0.5581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.553330323834059e-05, "epoch": 0.968421052631579, "percentage": 19.37, "elapsed_time": "0:46:41", "remaining_time": "3:14:23"} +{"current_steps": 6910, "total_steps": 35625, "loss": 0.6295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.552071188486989e-05, "epoch": 0.9698245614035088, "percentage": 19.4, "elapsed_time": "0:46:45", "remaining_time": "3:14:20"} +{"current_steps": 6920, "total_steps": 35625, "loss": 0.5697, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.550810455532164e-05, "epoch": 0.9712280701754386, "percentage": 19.42, "elapsed_time": "0:46:49", "remaining_time": "3:14:14"} +{"current_steps": 6930, "total_steps": 35625, "loss": 0.5933, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5495481259511095e-05, "epoch": 0.9726315789473684, "percentage": 19.45, "elapsed_time": "0:46:52", "remaining_time": "3:14:05"} +{"current_steps": 6940, "total_steps": 35625, "loss": 0.5723, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.54828420072659e-05, "epoch": 0.9740350877192983, "percentage": 19.48, "elapsed_time": "0:46:55", "remaining_time": "3:13:57"} +{"current_steps": 6950, "total_steps": 35625, "loss": 0.5749, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.547018680842616e-05, "epoch": 0.9754385964912281, "percentage": 19.51, "elapsed_time": "0:46:59", "remaining_time": "3:13:50"} +{"current_steps": 6960, "total_steps": 35625, "loss": 0.7053, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.545751567284439e-05, "epoch": 0.9768421052631578, "percentage": 19.54, "elapsed_time": "0:47:02", "remaining_time": "3:13:46"} +{"current_steps": 6970, "total_steps": 35625, "loss": 0.629, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5444828610385486e-05, "epoch": 0.9782456140350877, "percentage": 19.56, "elapsed_time": "0:47:06", "remaining_time": "3:13:40"} +{"current_steps": 6980, "total_steps": 35625, "loss": 0.6647, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.543212563092677e-05, "epoch": 0.9796491228070175, "percentage": 19.59, "elapsed_time": "0:47:09", "remaining_time": "3:13:33"} +{"current_steps": 6990, "total_steps": 35625, "loss": 0.6921, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.541940674435794e-05, "epoch": 0.9810526315789474, "percentage": 19.62, "elapsed_time": "0:47:14", "remaining_time": "3:13:33"} +{"current_steps": 7000, "total_steps": 35625, "loss": 0.6187, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5406671960581096e-05, "epoch": 0.9824561403508771, "percentage": 19.65, "elapsed_time": "0:47:18", "remaining_time": "3:13:26"} +{"current_steps": 7010, "total_steps": 35625, "loss": 0.6471, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.53939212895107e-05, "epoch": 0.983859649122807, "percentage": 19.68, "elapsed_time": "0:47:23", "remaining_time": "3:13:25"} +{"current_steps": 7020, "total_steps": 35625, "loss": 0.5916, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.538115474107357e-05, "epoch": 0.9852631578947368, "percentage": 19.71, "elapsed_time": "0:47:27", "remaining_time": "3:13:21"} +{"current_steps": 7030, "total_steps": 35625, "loss": 0.6859, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.536837232520893e-05, "epoch": 0.9866666666666667, "percentage": 19.73, "elapsed_time": "0:47:30", "remaining_time": "3:13:13"} +{"current_steps": 7040, "total_steps": 35625, "loss": 0.64, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.535557405186831e-05, "epoch": 0.9880701754385965, "percentage": 19.76, "elapsed_time": "0:47:33", "remaining_time": "3:13:05"} +{"current_steps": 7050, "total_steps": 35625, "loss": 0.5839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.534275993101561e-05, "epoch": 0.9894736842105263, "percentage": 19.79, "elapsed_time": "0:47:36", "remaining_time": "3:12:59"} +{"current_steps": 7060, "total_steps": 35625, "loss": 0.577, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.532992997262706e-05, "epoch": 0.9908771929824561, "percentage": 19.82, "elapsed_time": "0:47:41", "remaining_time": "3:12:59"} +{"current_steps": 7070, "total_steps": 35625, "loss": 0.7155, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.531708418669122e-05, "epoch": 0.992280701754386, "percentage": 19.85, "elapsed_time": "0:47:45", "remaining_time": "3:12:55"} +{"current_steps": 7080, "total_steps": 35625, "loss": 0.6713, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5304222583208983e-05, "epoch": 0.9936842105263158, "percentage": 19.87, "elapsed_time": "0:47:50", "remaining_time": "3:12:51"} +{"current_steps": 7090, "total_steps": 35625, "loss": 0.6528, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5291345172193546e-05, "epoch": 0.9950877192982456, "percentage": 19.9, "elapsed_time": "0:47:55", "remaining_time": "3:12:51"} +{"current_steps": 7100, "total_steps": 35625, "loss": 0.5705, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5278451963670403e-05, "epoch": 0.9964912280701754, "percentage": 19.93, "elapsed_time": "0:47:58", "remaining_time": "3:12:45"} +{"current_steps": 7110, "total_steps": 35625, "loss": 0.6763, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.526554296767738e-05, "epoch": 0.9978947368421053, "percentage": 19.96, "elapsed_time": "0:48:02", "remaining_time": "3:12:40"} +{"current_steps": 7120, "total_steps": 35625, "loss": 0.5855, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.525261819426455e-05, "epoch": 0.9992982456140351, "percentage": 19.99, "elapsed_time": "0:48:05", "remaining_time": "3:12:33"} +{"current_steps": 7130, "total_steps": 35625, "loss": 0.5631, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5239677653494305e-05, "epoch": 1.0007017543859649, "percentage": 20.01, "elapsed_time": "0:48:10", "remaining_time": "3:12:31"} +{"current_steps": 7140, "total_steps": 35625, "loss": 0.493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5226721355441306e-05, "epoch": 1.0021052631578948, "percentage": 20.04, "elapsed_time": "0:48:13", "remaining_time": "3:12:25"} +{"current_steps": 7150, "total_steps": 35625, "loss": 0.5049, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5213749310192455e-05, "epoch": 1.0035087719298246, "percentage": 20.07, "elapsed_time": "0:48:16", "remaining_time": "3:12:17"} +{"current_steps": 7160, "total_steps": 35625, "loss": 0.5204, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.520076152784695e-05, "epoch": 1.0049122807017543, "percentage": 20.1, "elapsed_time": "0:48:20", "remaining_time": "3:12:09"} +{"current_steps": 7170, "total_steps": 35625, "loss": 0.5395, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.518775801851622e-05, "epoch": 1.0063157894736843, "percentage": 20.13, "elapsed_time": "0:48:23", "remaining_time": "3:12:02"} +{"current_steps": 7180, "total_steps": 35625, "loss": 0.5231, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.517473879232395e-05, "epoch": 1.007719298245614, "percentage": 20.15, "elapsed_time": "0:48:28", "remaining_time": "3:12:02"} +{"current_steps": 7190, "total_steps": 35625, "loss": 0.5764, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.516170385940603e-05, "epoch": 1.0091228070175438, "percentage": 20.18, "elapsed_time": "0:48:32", "remaining_time": "3:11:59"} +{"current_steps": 7200, "total_steps": 35625, "loss": 0.5339, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.514865322991063e-05, "epoch": 1.0105263157894737, "percentage": 20.21, "elapsed_time": "0:48:36", "remaining_time": "3:11:52"} +{"current_steps": 7210, "total_steps": 35625, "loss": 0.5684, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.51355869139981e-05, "epoch": 1.0119298245614035, "percentage": 20.24, "elapsed_time": "0:48:39", "remaining_time": "3:11:44"} +{"current_steps": 7220, "total_steps": 35625, "loss": 0.539, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.512250492184101e-05, "epoch": 1.0133333333333334, "percentage": 20.27, "elapsed_time": "0:48:42", "remaining_time": "3:11:38"} +{"current_steps": 7230, "total_steps": 35625, "loss": 0.5348, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.510940726362416e-05, "epoch": 1.0147368421052632, "percentage": 20.29, "elapsed_time": "0:48:46", "remaining_time": "3:11:33"} +{"current_steps": 7240, "total_steps": 35625, "loss": 0.5795, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.50962939495445e-05, "epoch": 1.016140350877193, "percentage": 20.32, "elapsed_time": "0:48:49", "remaining_time": "3:11:26"} +{"current_steps": 7250, "total_steps": 35625, "loss": 0.5894, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.50831649898112e-05, "epoch": 1.0175438596491229, "percentage": 20.35, "elapsed_time": "0:48:52", "remaining_time": "3:11:18"} +{"current_steps": 7260, "total_steps": 35625, "loss": 0.5825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.507002039464562e-05, "epoch": 1.0189473684210526, "percentage": 20.38, "elapsed_time": "0:48:55", "remaining_time": "3:11:10"} +{"current_steps": 7270, "total_steps": 35625, "loss": 0.5513, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.505686017428127e-05, "epoch": 1.0203508771929826, "percentage": 20.41, "elapsed_time": "0:48:59", "remaining_time": "3:11:03"} +{"current_steps": 7280, "total_steps": 35625, "loss": 0.6675, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.504368433896382e-05, "epoch": 1.0217543859649123, "percentage": 20.44, "elapsed_time": "0:49:03", "remaining_time": "3:11:00"} +{"current_steps": 7290, "total_steps": 35625, "loss": 0.5146, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5030492898951134e-05, "epoch": 1.023157894736842, "percentage": 20.46, "elapsed_time": "0:49:06", "remaining_time": "3:10:51"} +{"current_steps": 7300, "total_steps": 35625, "loss": 0.6254, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.501728586451318e-05, "epoch": 1.024561403508772, "percentage": 20.49, "elapsed_time": "0:49:09", "remaining_time": "3:10:45"} +{"current_steps": 7310, "total_steps": 35625, "loss": 0.4688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5004063245932097e-05, "epoch": 1.0259649122807017, "percentage": 20.52, "elapsed_time": "0:49:13", "remaining_time": "3:10:40"} +{"current_steps": 7320, "total_steps": 35625, "loss": 0.5227, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4990825053502136e-05, "epoch": 1.0273684210526315, "percentage": 20.55, "elapsed_time": "0:49:18", "remaining_time": "3:10:38"} +{"current_steps": 7330, "total_steps": 35625, "loss": 0.5219, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.497757129752969e-05, "epoch": 1.0287719298245614, "percentage": 20.58, "elapsed_time": "0:49:22", "remaining_time": "3:10:36"} +{"current_steps": 7340, "total_steps": 35625, "loss": 0.5006, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.496430198833327e-05, "epoch": 1.0301754385964912, "percentage": 20.6, "elapsed_time": "0:49:27", "remaining_time": "3:10:33"} +{"current_steps": 7350, "total_steps": 35625, "loss": 0.519, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.495101713624348e-05, "epoch": 1.0315789473684212, "percentage": 20.63, "elapsed_time": "0:49:30", "remaining_time": "3:10:26"} +{"current_steps": 7360, "total_steps": 35625, "loss": 0.6042, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.493771675160303e-05, "epoch": 1.032982456140351, "percentage": 20.66, "elapsed_time": "0:49:34", "remaining_time": "3:10:22"} +{"current_steps": 7370, "total_steps": 35625, "loss": 0.5092, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4924400844766734e-05, "epoch": 1.0343859649122806, "percentage": 20.69, "elapsed_time": "0:49:37", "remaining_time": "3:10:14"} +{"current_steps": 7380, "total_steps": 35625, "loss": 0.6178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.491106942610147e-05, "epoch": 1.0357894736842106, "percentage": 20.72, "elapsed_time": "0:49:43", "remaining_time": "3:10:18"} +{"current_steps": 7390, "total_steps": 35625, "loss": 0.6226, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.489772250598622e-05, "epoch": 1.0371929824561403, "percentage": 20.74, "elapsed_time": "0:49:47", "remaining_time": "3:10:13"} +{"current_steps": 7400, "total_steps": 35625, "loss": 0.5821, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.488436009481201e-05, "epoch": 1.03859649122807, "percentage": 20.77, "elapsed_time": "0:49:50", "remaining_time": "3:10:06"} +{"current_steps": 7410, "total_steps": 35625, "loss": 0.5265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.487098220298193e-05, "epoch": 1.04, "percentage": 20.8, "elapsed_time": "0:49:53", "remaining_time": "3:09:59"} +{"current_steps": 7420, "total_steps": 35625, "loss": 0.5617, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.485758884091113e-05, "epoch": 1.0414035087719298, "percentage": 20.83, "elapsed_time": "0:49:58", "remaining_time": "3:09:58"} +{"current_steps": 7430, "total_steps": 35625, "loss": 0.5468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4844180019026805e-05, "epoch": 1.0428070175438597, "percentage": 20.86, "elapsed_time": "0:50:02", "remaining_time": "3:09:52"} +{"current_steps": 7440, "total_steps": 35625, "loss": 0.5048, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.483075574776819e-05, "epoch": 1.0442105263157895, "percentage": 20.88, "elapsed_time": "0:50:05", "remaining_time": "3:09:44"} +{"current_steps": 7450, "total_steps": 35625, "loss": 0.5684, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4817316037586524e-05, "epoch": 1.0456140350877192, "percentage": 20.91, "elapsed_time": "0:50:08", "remaining_time": "3:09:36"} +{"current_steps": 7460, "total_steps": 35625, "loss": 0.5851, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.480386089894509e-05, "epoch": 1.0470175438596492, "percentage": 20.94, "elapsed_time": "0:50:11", "remaining_time": "3:09:28"} +{"current_steps": 7470, "total_steps": 35625, "loss": 0.5308, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.479039034231918e-05, "epoch": 1.048421052631579, "percentage": 20.97, "elapsed_time": "0:50:16", "remaining_time": "3:09:28"} +{"current_steps": 7480, "total_steps": 35625, "loss": 0.5904, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.477690437819607e-05, "epoch": 1.0498245614035087, "percentage": 21.0, "elapsed_time": "0:50:19", "remaining_time": "3:09:22"} +{"current_steps": 7490, "total_steps": 35625, "loss": 0.4894, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.476340301707507e-05, "epoch": 1.0512280701754386, "percentage": 21.02, "elapsed_time": "0:50:24", "remaining_time": "3:09:20"} +{"current_steps": 7500, "total_steps": 35625, "loss": 0.4906, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4749886269467416e-05, "epoch": 1.0526315789473684, "percentage": 21.05, "elapsed_time": "0:50:29", "remaining_time": "3:09:21"} +{"current_steps": 7510, "total_steps": 35625, "loss": 0.5399, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.473635414589639e-05, "epoch": 1.0540350877192983, "percentage": 21.08, "elapsed_time": "0:50:32", "remaining_time": "3:09:14"} +{"current_steps": 7520, "total_steps": 35625, "loss": 0.5168, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.47228066568972e-05, "epoch": 1.055438596491228, "percentage": 21.11, "elapsed_time": "0:50:36", "remaining_time": "3:09:07"} +{"current_steps": 7530, "total_steps": 35625, "loss": 0.4888, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.470924381301704e-05, "epoch": 1.0568421052631578, "percentage": 21.14, "elapsed_time": "0:50:40", "remaining_time": "3:09:04"} +{"current_steps": 7540, "total_steps": 35625, "loss": 0.4909, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.469566562481503e-05, "epoch": 1.0582456140350878, "percentage": 21.16, "elapsed_time": "0:50:43", "remaining_time": "3:08:57"} +{"current_steps": 7550, "total_steps": 35625, "loss": 0.5369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4682072102862286e-05, "epoch": 1.0596491228070175, "percentage": 21.19, "elapsed_time": "0:50:46", "remaining_time": "3:08:50"} +{"current_steps": 7560, "total_steps": 35625, "loss": 0.5046, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.466846325774179e-05, "epoch": 1.0610526315789475, "percentage": 21.22, "elapsed_time": "0:50:51", "remaining_time": "3:08:48"} +{"current_steps": 7570, "total_steps": 35625, "loss": 0.5201, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4654839100048535e-05, "epoch": 1.0624561403508772, "percentage": 21.25, "elapsed_time": "0:50:56", "remaining_time": "3:08:45"} +{"current_steps": 7580, "total_steps": 35625, "loss": 0.5238, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.464119964038937e-05, "epoch": 1.063859649122807, "percentage": 21.28, "elapsed_time": "0:50:59", "remaining_time": "3:08:39"} +{"current_steps": 7590, "total_steps": 35625, "loss": 0.5074, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.462754488938309e-05, "epoch": 1.065263157894737, "percentage": 21.31, "elapsed_time": "0:51:03", "remaining_time": "3:08:33"} +{"current_steps": 7600, "total_steps": 35625, "loss": 0.5297, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4613874857660384e-05, "epoch": 1.0666666666666667, "percentage": 21.33, "elapsed_time": "0:51:06", "remaining_time": "3:08:27"} +{"current_steps": 7610, "total_steps": 35625, "loss": 0.5585, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.460018955586384e-05, "epoch": 1.0680701754385964, "percentage": 21.36, "elapsed_time": "0:51:11", "remaining_time": "3:08:28"} +{"current_steps": 7620, "total_steps": 35625, "loss": 0.4944, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.458648899464793e-05, "epoch": 1.0694736842105264, "percentage": 21.39, "elapsed_time": "0:51:16", "remaining_time": "3:08:27"} +{"current_steps": 7630, "total_steps": 35625, "loss": 0.5736, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.457277318467903e-05, "epoch": 1.070877192982456, "percentage": 21.42, "elapsed_time": "0:51:21", "remaining_time": "3:08:25"} +{"current_steps": 7640, "total_steps": 35625, "loss": 0.6152, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4559042136635345e-05, "epoch": 1.072280701754386, "percentage": 21.45, "elapsed_time": "0:51:26", "remaining_time": "3:08:23"} +{"current_steps": 7650, "total_steps": 35625, "loss": 0.4936, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4545295861206975e-05, "epoch": 1.0736842105263158, "percentage": 21.47, "elapsed_time": "0:51:29", "remaining_time": "3:08:16"} +{"current_steps": 7660, "total_steps": 35625, "loss": 0.5547, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.453153436909587e-05, "epoch": 1.0750877192982455, "percentage": 21.5, "elapsed_time": "0:51:32", "remaining_time": "3:08:11"} +{"current_steps": 7670, "total_steps": 35625, "loss": 0.537, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4517757671015826e-05, "epoch": 1.0764912280701755, "percentage": 21.53, "elapsed_time": "0:51:36", "remaining_time": "3:08:05"} +{"current_steps": 7680, "total_steps": 35625, "loss": 0.5131, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4503965777692456e-05, "epoch": 1.0778947368421052, "percentage": 21.56, "elapsed_time": "0:51:39", "remaining_time": "3:07:58"} +{"current_steps": 7690, "total_steps": 35625, "loss": 0.4782, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.449015869986325e-05, "epoch": 1.079298245614035, "percentage": 21.59, "elapsed_time": "0:51:44", "remaining_time": "3:07:56"} +{"current_steps": 7700, "total_steps": 35625, "loss": 0.4962, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.447633644827747e-05, "epoch": 1.080701754385965, "percentage": 21.61, "elapsed_time": "0:51:47", "remaining_time": "3:07:49"} +{"current_steps": 7710, "total_steps": 35625, "loss": 0.5025, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.446249903369621e-05, "epoch": 1.0821052631578947, "percentage": 21.64, "elapsed_time": "0:51:51", "remaining_time": "3:07:46"} +{"current_steps": 7720, "total_steps": 35625, "loss": 0.4816, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.444864646689239e-05, "epoch": 1.0835087719298246, "percentage": 21.67, "elapsed_time": "0:51:57", "remaining_time": "3:07:47"} +{"current_steps": 7730, "total_steps": 35625, "loss": 0.4762, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.443477875865071e-05, "epoch": 1.0849122807017544, "percentage": 21.7, "elapsed_time": "0:52:04", "remaining_time": "3:07:53"} +{"current_steps": 7740, "total_steps": 35625, "loss": 0.4501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4420895919767626e-05, "epoch": 1.0863157894736841, "percentage": 21.73, "elapsed_time": "0:52:07", "remaining_time": "3:07:46"} +{"current_steps": 7750, "total_steps": 35625, "loss": 0.4855, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.440699796105143e-05, "epoch": 1.087719298245614, "percentage": 21.75, "elapsed_time": "0:52:11", "remaining_time": "3:07:42"} +{"current_steps": 7760, "total_steps": 35625, "loss": 0.5558, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.439308489332215e-05, "epoch": 1.0891228070175438, "percentage": 21.78, "elapsed_time": "0:52:15", "remaining_time": "3:07:39"} +{"current_steps": 7770, "total_steps": 35625, "loss": 0.5219, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.437915672741158e-05, "epoch": 1.0905263157894738, "percentage": 21.81, "elapsed_time": "0:52:19", "remaining_time": "3:07:34"} +{"current_steps": 7780, "total_steps": 35625, "loss": 0.4643, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.43652134741633e-05, "epoch": 1.0919298245614035, "percentage": 21.84, "elapsed_time": "0:52:22", "remaining_time": "3:07:28"} +{"current_steps": 7790, "total_steps": 35625, "loss": 0.562, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.435125514443258e-05, "epoch": 1.0933333333333333, "percentage": 21.87, "elapsed_time": "0:52:25", "remaining_time": "3:07:20"} +{"current_steps": 7800, "total_steps": 35625, "loss": 0.5022, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4337281749086477e-05, "epoch": 1.0947368421052632, "percentage": 21.89, "elapsed_time": "0:52:29", "remaining_time": "3:07:16"} +{"current_steps": 7810, "total_steps": 35625, "loss": 0.5462, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.432329329900375e-05, "epoch": 1.096140350877193, "percentage": 21.92, "elapsed_time": "0:52:33", "remaining_time": "3:07:09"} +{"current_steps": 7820, "total_steps": 35625, "loss": 0.5103, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4309289805074895e-05, "epoch": 1.0975438596491227, "percentage": 21.95, "elapsed_time": "0:52:36", "remaining_time": "3:07:02"} +{"current_steps": 7830, "total_steps": 35625, "loss": 0.5588, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.42952712782021e-05, "epoch": 1.0989473684210527, "percentage": 21.98, "elapsed_time": "0:52:39", "remaining_time": "3:06:56"} +{"current_steps": 7840, "total_steps": 35625, "loss": 0.5107, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.428123772929928e-05, "epoch": 1.1003508771929824, "percentage": 22.01, "elapsed_time": "0:52:43", "remaining_time": "3:06:52"} +{"current_steps": 7850, "total_steps": 35625, "loss": 0.463, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.426718916929202e-05, "epoch": 1.1017543859649124, "percentage": 22.04, "elapsed_time": "0:52:47", "remaining_time": "3:06:48"} +{"current_steps": 7860, "total_steps": 35625, "loss": 0.4801, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.425312560911762e-05, "epoch": 1.1031578947368421, "percentage": 22.06, "elapsed_time": "0:52:52", "remaining_time": "3:06:45"} +{"current_steps": 7870, "total_steps": 35625, "loss": 0.4755, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4239047059725035e-05, "epoch": 1.1045614035087719, "percentage": 22.09, "elapsed_time": "0:52:56", "remaining_time": "3:06:42"} +{"current_steps": 7880, "total_steps": 35625, "loss": 0.4449, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.422495353207491e-05, "epoch": 1.1059649122807018, "percentage": 22.12, "elapsed_time": "0:53:00", "remaining_time": "3:06:39"} +{"current_steps": 7890, "total_steps": 35625, "loss": 0.4613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4210845037139525e-05, "epoch": 1.1073684210526316, "percentage": 22.15, "elapsed_time": "0:53:05", "remaining_time": "3:06:39"} +{"current_steps": 7900, "total_steps": 35625, "loss": 0.6132, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.419672158590282e-05, "epoch": 1.1087719298245613, "percentage": 22.18, "elapsed_time": "0:53:09", "remaining_time": "3:06:33"} +{"current_steps": 7910, "total_steps": 35625, "loss": 0.5235, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4182583189360415e-05, "epoch": 1.1101754385964913, "percentage": 22.2, "elapsed_time": "0:53:14", "remaining_time": "3:06:31"} +{"current_steps": 7920, "total_steps": 35625, "loss": 0.5066, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.416842985851951e-05, "epoch": 1.111578947368421, "percentage": 22.23, "elapsed_time": "0:53:17", "remaining_time": "3:06:25"} +{"current_steps": 7930, "total_steps": 35625, "loss": 0.5148, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.415426160439897e-05, "epoch": 1.112982456140351, "percentage": 22.26, "elapsed_time": "0:53:20", "remaining_time": "3:06:18"} +{"current_steps": 7940, "total_steps": 35625, "loss": 0.4731, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.414007843802927e-05, "epoch": 1.1143859649122807, "percentage": 22.29, "elapsed_time": "0:53:24", "remaining_time": "3:06:12"} +{"current_steps": 7950, "total_steps": 35625, "loss": 0.4657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.412588037045248e-05, "epoch": 1.1157894736842104, "percentage": 22.32, "elapsed_time": "0:53:28", "remaining_time": "3:06:08"} +{"current_steps": 7960, "total_steps": 35625, "loss": 0.5292, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.411166741272228e-05, "epoch": 1.1171929824561404, "percentage": 22.34, "elapsed_time": "0:53:31", "remaining_time": "3:06:00"} +{"current_steps": 7970, "total_steps": 35625, "loss": 0.5086, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4097439575903964e-05, "epoch": 1.1185964912280701, "percentage": 22.37, "elapsed_time": "0:53:35", "remaining_time": "3:05:56"} +{"current_steps": 7980, "total_steps": 35625, "loss": 0.4074, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.408319687107437e-05, "epoch": 1.12, "percentage": 22.4, "elapsed_time": "0:53:40", "remaining_time": "3:05:57"} +{"current_steps": 7990, "total_steps": 35625, "loss": 0.5302, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.406893930932195e-05, "epoch": 1.1214035087719298, "percentage": 22.43, "elapsed_time": "0:53:43", "remaining_time": "3:05:50"} +{"current_steps": 8000, "total_steps": 35625, "loss": 0.5533, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4054666901746685e-05, "epoch": 1.1228070175438596, "percentage": 22.46, "elapsed_time": "0:53:47", "remaining_time": "3:05:43"} +{"current_steps": 8000, "total_steps": 35625, "loss": null, "eval_loss": 0.6547604203224182, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.1228070175438596, "percentage": 22.46, "elapsed_time": "0:53:47", "remaining_time": "3:05:43"} +{"current_steps": 8010, "total_steps": 35625, "loss": 0.5533, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.404037965946015e-05, "epoch": 1.1242105263157895, "percentage": 22.48, "elapsed_time": "0:54:34", "remaining_time": "3:08:10"} +{"current_steps": 8020, "total_steps": 35625, "loss": 0.573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.402607759358545e-05, "epoch": 1.1256140350877193, "percentage": 22.51, "elapsed_time": "0:54:38", "remaining_time": "3:08:04"} +{"current_steps": 8030, "total_steps": 35625, "loss": 0.6002, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.401176071525722e-05, "epoch": 1.127017543859649, "percentage": 22.54, "elapsed_time": "0:54:41", "remaining_time": "3:07:57"} +{"current_steps": 8040, "total_steps": 35625, "loss": 0.5412, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.399742903562166e-05, "epoch": 1.128421052631579, "percentage": 22.57, "elapsed_time": "0:54:45", "remaining_time": "3:07:52"} +{"current_steps": 8050, "total_steps": 35625, "loss": 0.5516, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3983082565836454e-05, "epoch": 1.1298245614035087, "percentage": 22.6, "elapsed_time": "0:54:48", "remaining_time": "3:07:45"} +{"current_steps": 8060, "total_steps": 35625, "loss": 0.5142, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3968721317070835e-05, "epoch": 1.1312280701754387, "percentage": 22.62, "elapsed_time": "0:54:52", "remaining_time": "3:07:39"} +{"current_steps": 8070, "total_steps": 35625, "loss": 0.4974, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.395434530050553e-05, "epoch": 1.1326315789473684, "percentage": 22.65, "elapsed_time": "0:54:56", "remaining_time": "3:07:34"} +{"current_steps": 8080, "total_steps": 35625, "loss": 0.5921, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.393995452733274e-05, "epoch": 1.1340350877192982, "percentage": 22.68, "elapsed_time": "0:54:59", "remaining_time": "3:07:27"} +{"current_steps": 8090, "total_steps": 35625, "loss": 0.5516, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.392554900875619e-05, "epoch": 1.1354385964912281, "percentage": 22.71, "elapsed_time": "0:55:04", "remaining_time": "3:07:26"} +{"current_steps": 8100, "total_steps": 35625, "loss": 0.4377, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3911128755991085e-05, "epoch": 1.1368421052631579, "percentage": 22.74, "elapsed_time": "0:55:08", "remaining_time": "3:07:22"} +{"current_steps": 8110, "total_steps": 35625, "loss": 0.5489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3896693780264054e-05, "epoch": 1.1382456140350876, "percentage": 22.76, "elapsed_time": "0:55:11", "remaining_time": "3:07:16"} +{"current_steps": 8120, "total_steps": 35625, "loss": 0.4883, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.388224409281324e-05, "epoch": 1.1396491228070176, "percentage": 22.79, "elapsed_time": "0:55:15", "remaining_time": "3:07:12"} +{"current_steps": 8130, "total_steps": 35625, "loss": 0.5316, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3867779704888225e-05, "epoch": 1.1410526315789473, "percentage": 22.82, "elapsed_time": "0:55:19", "remaining_time": "3:07:07"} +{"current_steps": 8140, "total_steps": 35625, "loss": 0.5961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.385330062775001e-05, "epoch": 1.1424561403508773, "percentage": 22.85, "elapsed_time": "0:55:23", "remaining_time": "3:07:00"} +{"current_steps": 8150, "total_steps": 35625, "loss": 0.5839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.383880687267107e-05, "epoch": 1.143859649122807, "percentage": 22.88, "elapsed_time": "0:55:27", "remaining_time": "3:06:59"} +{"current_steps": 8160, "total_steps": 35625, "loss": 0.4834, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3824298450935284e-05, "epoch": 1.1452631578947368, "percentage": 22.91, "elapsed_time": "0:55:32", "remaining_time": "3:06:56"} +{"current_steps": 8170, "total_steps": 35625, "loss": 0.5543, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.380977537383796e-05, "epoch": 1.1466666666666667, "percentage": 22.93, "elapsed_time": "0:55:35", "remaining_time": "3:06:49"} +{"current_steps": 8180, "total_steps": 35625, "loss": 0.5548, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.37952376526858e-05, "epoch": 1.1480701754385965, "percentage": 22.96, "elapsed_time": "0:55:38", "remaining_time": "3:06:42"} +{"current_steps": 8190, "total_steps": 35625, "loss": 0.4596, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.378068529879693e-05, "epoch": 1.1494736842105264, "percentage": 22.99, "elapsed_time": "0:55:43", "remaining_time": "3:06:38"} +{"current_steps": 8200, "total_steps": 35625, "loss": 0.5165, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.376611832350085e-05, "epoch": 1.1508771929824562, "percentage": 23.02, "elapsed_time": "0:55:46", "remaining_time": "3:06:33"} +{"current_steps": 8210, "total_steps": 35625, "loss": 0.5085, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3751536738138454e-05, "epoch": 1.152280701754386, "percentage": 23.05, "elapsed_time": "0:55:50", "remaining_time": "3:06:28"} +{"current_steps": 8220, "total_steps": 35625, "loss": 0.5485, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3736940554062e-05, "epoch": 1.1536842105263159, "percentage": 23.07, "elapsed_time": "0:55:56", "remaining_time": "3:06:28"} +{"current_steps": 8230, "total_steps": 35625, "loss": 0.5049, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.372232978263513e-05, "epoch": 1.1550877192982456, "percentage": 23.1, "elapsed_time": "0:56:00", "remaining_time": "3:06:25"} +{"current_steps": 8240, "total_steps": 35625, "loss": 0.4833, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3707704435232816e-05, "epoch": 1.1564912280701753, "percentage": 23.13, "elapsed_time": "0:56:03", "remaining_time": "3:06:17"} +{"current_steps": 8250, "total_steps": 35625, "loss": 0.5616, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.36930645232414e-05, "epoch": 1.1578947368421053, "percentage": 23.16, "elapsed_time": "0:56:06", "remaining_time": "3:06:11"} +{"current_steps": 8260, "total_steps": 35625, "loss": 0.5448, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.367841005805855e-05, "epoch": 1.159298245614035, "percentage": 23.19, "elapsed_time": "0:56:10", "remaining_time": "3:06:07"} +{"current_steps": 8270, "total_steps": 35625, "loss": 0.4714, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.366374105109327e-05, "epoch": 1.1607017543859648, "percentage": 23.21, "elapsed_time": "0:56:14", "remaining_time": "3:06:02"} +{"current_steps": 8280, "total_steps": 35625, "loss": 0.5994, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.364905751376589e-05, "epoch": 1.1621052631578948, "percentage": 23.24, "elapsed_time": "0:56:17", "remaining_time": "3:05:55"} +{"current_steps": 8290, "total_steps": 35625, "loss": 0.5633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3634359457508046e-05, "epoch": 1.1635087719298245, "percentage": 23.27, "elapsed_time": "0:56:21", "remaining_time": "3:05:48"} +{"current_steps": 8300, "total_steps": 35625, "loss": 0.5566, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3619646893762675e-05, "epoch": 1.1649122807017545, "percentage": 23.3, "elapsed_time": "0:56:25", "remaining_time": "3:05:45"} +{"current_steps": 8310, "total_steps": 35625, "loss": 0.4631, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.360491983398402e-05, "epoch": 1.1663157894736842, "percentage": 23.33, "elapsed_time": "0:56:29", "remaining_time": "3:05:41"} +{"current_steps": 8320, "total_steps": 35625, "loss": 0.4525, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3590178289637585e-05, "epoch": 1.167719298245614, "percentage": 23.35, "elapsed_time": "0:56:32", "remaining_time": "3:05:34"} +{"current_steps": 8330, "total_steps": 35625, "loss": 0.4501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.357542227220019e-05, "epoch": 1.169122807017544, "percentage": 23.38, "elapsed_time": "0:56:38", "remaining_time": "3:05:34"} +{"current_steps": 8340, "total_steps": 35625, "loss": 0.561, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.356065179315988e-05, "epoch": 1.1705263157894736, "percentage": 23.41, "elapsed_time": "0:56:42", "remaining_time": "3:05:31"} +{"current_steps": 8350, "total_steps": 35625, "loss": 0.513, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.354586686401599e-05, "epoch": 1.1719298245614036, "percentage": 23.44, "elapsed_time": "0:56:46", "remaining_time": "3:05:26"} +{"current_steps": 8360, "total_steps": 35625, "loss": 0.4915, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.353106749627909e-05, "epoch": 1.1733333333333333, "percentage": 23.47, "elapsed_time": "0:56:49", "remaining_time": "3:05:19"} +{"current_steps": 8370, "total_steps": 35625, "loss": 0.5193, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3516253701471e-05, "epoch": 1.174736842105263, "percentage": 23.49, "elapsed_time": "0:56:52", "remaining_time": "3:05:12"} +{"current_steps": 8380, "total_steps": 35625, "loss": 0.5881, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.350142549112476e-05, "epoch": 1.176140350877193, "percentage": 23.52, "elapsed_time": "0:56:57", "remaining_time": "3:05:09"} +{"current_steps": 8390, "total_steps": 35625, "loss": 0.5378, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.348658287678465e-05, "epoch": 1.1775438596491228, "percentage": 23.55, "elapsed_time": "0:57:01", "remaining_time": "3:05:05"} +{"current_steps": 8400, "total_steps": 35625, "loss": 0.5356, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.347172587000614e-05, "epoch": 1.1789473684210527, "percentage": 23.58, "elapsed_time": "0:57:04", "remaining_time": "3:05:00"} +{"current_steps": 8410, "total_steps": 35625, "loss": 0.5849, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.345685448235594e-05, "epoch": 1.1803508771929825, "percentage": 23.61, "elapsed_time": "0:57:10", "remaining_time": "3:05:01"} +{"current_steps": 8420, "total_steps": 35625, "loss": 0.5157, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3441968725411905e-05, "epoch": 1.1817543859649122, "percentage": 23.64, "elapsed_time": "0:57:14", "remaining_time": "3:04:56"} +{"current_steps": 8430, "total_steps": 35625, "loss": 0.6508, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.342706861076313e-05, "epoch": 1.1831578947368422, "percentage": 23.66, "elapsed_time": "0:57:18", "remaining_time": "3:04:52"} +{"current_steps": 8440, "total_steps": 35625, "loss": 0.525, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.341215415000987e-05, "epoch": 1.184561403508772, "percentage": 23.69, "elapsed_time": "0:57:22", "remaining_time": "3:04:47"} +{"current_steps": 8450, "total_steps": 35625, "loss": 0.6218, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.339722535476353e-05, "epoch": 1.1859649122807017, "percentage": 23.72, "elapsed_time": "0:57:25", "remaining_time": "3:04:39"} +{"current_steps": 8460, "total_steps": 35625, "loss": 0.5375, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3382282236646684e-05, "epoch": 1.1873684210526316, "percentage": 23.75, "elapsed_time": "0:57:28", "remaining_time": "3:04:32"} +{"current_steps": 8470, "total_steps": 35625, "loss": 0.5454, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.336732480729306e-05, "epoch": 1.1887719298245614, "percentage": 23.78, "elapsed_time": "0:57:32", "remaining_time": "3:04:27"} +{"current_steps": 8480, "total_steps": 35625, "loss": 0.5507, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.335235307834755e-05, "epoch": 1.190175438596491, "percentage": 23.8, "elapsed_time": "0:57:35", "remaining_time": "3:04:21"} +{"current_steps": 8490, "total_steps": 35625, "loss": 0.5172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.333736706146615e-05, "epoch": 1.191578947368421, "percentage": 23.83, "elapsed_time": "0:57:39", "remaining_time": "3:04:16"} +{"current_steps": 8500, "total_steps": 35625, "loss": 0.5401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.332236676831598e-05, "epoch": 1.1929824561403508, "percentage": 23.86, "elapsed_time": "0:57:44", "remaining_time": "3:04:15"} +{"current_steps": 8510, "total_steps": 35625, "loss": 0.4734, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.330735221057529e-05, "epoch": 1.1943859649122808, "percentage": 23.89, "elapsed_time": "0:57:48", "remaining_time": "3:04:11"} +{"current_steps": 8520, "total_steps": 35625, "loss": 0.4783, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.329232339993342e-05, "epoch": 1.1957894736842105, "percentage": 23.92, "elapsed_time": "0:57:53", "remaining_time": "3:04:11"} +{"current_steps": 8530, "total_steps": 35625, "loss": 0.5843, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.327728034809082e-05, "epoch": 1.1971929824561403, "percentage": 23.94, "elapsed_time": "0:57:56", "remaining_time": "3:04:03"} +{"current_steps": 8540, "total_steps": 35625, "loss": 0.4922, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.326222306675902e-05, "epoch": 1.1985964912280702, "percentage": 23.97, "elapsed_time": "0:58:00", "remaining_time": "3:03:58"} +{"current_steps": 8550, "total_steps": 35625, "loss": 0.6196, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.324715156766064e-05, "epoch": 1.2, "percentage": 24.0, "elapsed_time": "0:58:04", "remaining_time": "3:03:53"} +{"current_steps": 8560, "total_steps": 35625, "loss": 0.4713, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3232065862529334e-05, "epoch": 1.20140350877193, "percentage": 24.03, "elapsed_time": "0:58:08", "remaining_time": "3:03:48"} +{"current_steps": 8570, "total_steps": 35625, "loss": 0.5015, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.321696596310987e-05, "epoch": 1.2028070175438597, "percentage": 24.06, "elapsed_time": "0:58:11", "remaining_time": "3:03:42"} +{"current_steps": 8580, "total_steps": 35625, "loss": 0.569, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3201851881158004e-05, "epoch": 1.2042105263157894, "percentage": 24.08, "elapsed_time": "0:58:16", "remaining_time": "3:03:42"} +{"current_steps": 8590, "total_steps": 35625, "loss": 0.5079, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.31867236284406e-05, "epoch": 1.2056140350877194, "percentage": 24.11, "elapsed_time": "0:58:22", "remaining_time": "3:03:42"} +{"current_steps": 8600, "total_steps": 35625, "loss": 0.5132, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.31715812167355e-05, "epoch": 1.207017543859649, "percentage": 24.14, "elapsed_time": "0:58:25", "remaining_time": "3:03:37"} +{"current_steps": 8610, "total_steps": 35625, "loss": 0.5907, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3156424657831596e-05, "epoch": 1.208421052631579, "percentage": 24.17, "elapsed_time": "0:58:29", "remaining_time": "3:03:30"} +{"current_steps": 8620, "total_steps": 35625, "loss": 0.6086, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3141253963528795e-05, "epoch": 1.2098245614035088, "percentage": 24.2, "elapsed_time": "0:58:33", "remaining_time": "3:03:26"} +{"current_steps": 8630, "total_steps": 35625, "loss": 0.4966, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3126069145637987e-05, "epoch": 1.2112280701754385, "percentage": 24.22, "elapsed_time": "0:58:37", "remaining_time": "3:03:22"} +{"current_steps": 8640, "total_steps": 35625, "loss": 0.5713, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3110870215981095e-05, "epoch": 1.2126315789473685, "percentage": 24.25, "elapsed_time": "0:58:42", "remaining_time": "3:03:20"} +{"current_steps": 8650, "total_steps": 35625, "loss": 0.5538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.309565718639098e-05, "epoch": 1.2140350877192982, "percentage": 24.28, "elapsed_time": "0:58:45", "remaining_time": "3:03:14"} +{"current_steps": 8660, "total_steps": 35625, "loss": 0.5065, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.308043006871153e-05, "epoch": 1.215438596491228, "percentage": 24.31, "elapsed_time": "0:58:49", "remaining_time": "3:03:09"} +{"current_steps": 8670, "total_steps": 35625, "loss": 0.495, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.306518887479758e-05, "epoch": 1.216842105263158, "percentage": 24.34, "elapsed_time": "0:58:52", "remaining_time": "3:03:03"} +{"current_steps": 8680, "total_steps": 35625, "loss": 0.5217, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3049933616514895e-05, "epoch": 1.2182456140350877, "percentage": 24.36, "elapsed_time": "0:58:57", "remaining_time": "3:02:59"} +{"current_steps": 8690, "total_steps": 35625, "loss": 0.5196, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.303466430574024e-05, "epoch": 1.2196491228070174, "percentage": 24.39, "elapsed_time": "0:59:01", "remaining_time": "3:02:57"} +{"current_steps": 8700, "total_steps": 35625, "loss": 0.4687, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.301938095436129e-05, "epoch": 1.2210526315789474, "percentage": 24.42, "elapsed_time": "0:59:05", "remaining_time": "3:02:53"} +{"current_steps": 8710, "total_steps": 35625, "loss": 0.6043, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.300408357427666e-05, "epoch": 1.2224561403508771, "percentage": 24.45, "elapsed_time": "0:59:09", "remaining_time": "3:02:47"} +{"current_steps": 8720, "total_steps": 35625, "loss": 0.5359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.298877217739587e-05, "epoch": 1.223859649122807, "percentage": 24.48, "elapsed_time": "0:59:12", "remaining_time": "3:02:41"} +{"current_steps": 8730, "total_steps": 35625, "loss": 0.4502, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.29734467756394e-05, "epoch": 1.2252631578947368, "percentage": 24.51, "elapsed_time": "0:59:17", "remaining_time": "3:02:38"} +{"current_steps": 8740, "total_steps": 35625, "loss": 0.4823, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2958107380938564e-05, "epoch": 1.2266666666666666, "percentage": 24.53, "elapsed_time": "0:59:20", "remaining_time": "3:02:33"} +{"current_steps": 8750, "total_steps": 35625, "loss": 0.4295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.294275400523564e-05, "epoch": 1.2280701754385965, "percentage": 24.56, "elapsed_time": "0:59:23", "remaining_time": "3:02:26"} +{"current_steps": 8760, "total_steps": 35625, "loss": 0.4983, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2927386660483726e-05, "epoch": 1.2294736842105263, "percentage": 24.59, "elapsed_time": "0:59:27", "remaining_time": "3:02:19"} +{"current_steps": 8770, "total_steps": 35625, "loss": 0.5405, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.291200535864684e-05, "epoch": 1.2308771929824562, "percentage": 24.62, "elapsed_time": "0:59:31", "remaining_time": "3:02:15"} +{"current_steps": 8780, "total_steps": 35625, "loss": 0.5094, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.289661011169986e-05, "epoch": 1.232280701754386, "percentage": 24.65, "elapsed_time": "0:59:36", "remaining_time": "3:02:15"} +{"current_steps": 8790, "total_steps": 35625, "loss": 0.5622, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.28812009316285e-05, "epoch": 1.2336842105263157, "percentage": 24.67, "elapsed_time": "0:59:39", "remaining_time": "3:02:08"} +{"current_steps": 8800, "total_steps": 35625, "loss": 0.4577, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.286577783042934e-05, "epoch": 1.2350877192982457, "percentage": 24.7, "elapsed_time": "0:59:43", "remaining_time": "3:02:02"} +{"current_steps": 8810, "total_steps": 35625, "loss": 0.6015, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.285034082010981e-05, "epoch": 1.2364912280701754, "percentage": 24.73, "elapsed_time": "0:59:47", "remaining_time": "3:01:57"} +{"current_steps": 8820, "total_steps": 35625, "loss": 0.5716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2834889912688126e-05, "epoch": 1.2378947368421054, "percentage": 24.76, "elapsed_time": "0:59:50", "remaining_time": "3:01:52"} +{"current_steps": 8830, "total_steps": 35625, "loss": 0.5634, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.281942512019336e-05, "epoch": 1.2392982456140351, "percentage": 24.79, "elapsed_time": "0:59:54", "remaining_time": "3:01:46"} +{"current_steps": 8840, "total_steps": 35625, "loss": 0.4982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2803946454665376e-05, "epoch": 1.2407017543859649, "percentage": 24.81, "elapsed_time": "0:59:57", "remaining_time": "3:01:39"} +{"current_steps": 8850, "total_steps": 35625, "loss": 0.5006, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2788453928154855e-05, "epoch": 1.2421052631578948, "percentage": 24.84, "elapsed_time": "1:00:00", "remaining_time": "3:01:32"} +{"current_steps": 8860, "total_steps": 35625, "loss": 0.4901, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2772947552723266e-05, "epoch": 1.2435087719298246, "percentage": 24.87, "elapsed_time": "1:00:05", "remaining_time": "3:01:30"} +{"current_steps": 8870, "total_steps": 35625, "loss": 0.4847, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.275742734044283e-05, "epoch": 1.2449122807017543, "percentage": 24.9, "elapsed_time": "1:00:09", "remaining_time": "3:01:27"} +{"current_steps": 8880, "total_steps": 35625, "loss": 0.5224, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.274189330339658e-05, "epoch": 1.2463157894736843, "percentage": 24.93, "elapsed_time": "1:00:15", "remaining_time": "3:01:28"} +{"current_steps": 8890, "total_steps": 35625, "loss": 0.4698, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.272634545367831e-05, "epoch": 1.247719298245614, "percentage": 24.95, "elapsed_time": "1:00:19", "remaining_time": "3:01:23"} +{"current_steps": 8900, "total_steps": 35625, "loss": 0.5801, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.271078380339252e-05, "epoch": 1.2491228070175437, "percentage": 24.98, "elapsed_time": "1:00:23", "remaining_time": "3:01:19"} +{"current_steps": 8910, "total_steps": 35625, "loss": 0.6584, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.269520836465452e-05, "epoch": 1.2505263157894737, "percentage": 25.01, "elapsed_time": "1:00:26", "remaining_time": "3:01:13"} +{"current_steps": 8920, "total_steps": 35625, "loss": 0.5752, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2679619149590304e-05, "epoch": 1.2519298245614034, "percentage": 25.04, "elapsed_time": "1:00:29", "remaining_time": "3:01:05"} +{"current_steps": 8930, "total_steps": 35625, "loss": 0.4829, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.266401617033662e-05, "epoch": 1.2533333333333334, "percentage": 25.07, "elapsed_time": "1:00:32", "remaining_time": "3:00:59"} +{"current_steps": 8940, "total_steps": 35625, "loss": 0.5411, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.264839943904091e-05, "epoch": 1.2547368421052632, "percentage": 25.09, "elapsed_time": "1:00:36", "remaining_time": "3:00:53"} +{"current_steps": 8950, "total_steps": 35625, "loss": 0.5089, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2632768967861345e-05, "epoch": 1.256140350877193, "percentage": 25.12, "elapsed_time": "1:00:40", "remaining_time": "3:00:51"} +{"current_steps": 8960, "total_steps": 35625, "loss": 0.6257, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.261712476896679e-05, "epoch": 1.2575438596491229, "percentage": 25.15, "elapsed_time": "1:00:45", "remaining_time": "3:00:49"} +{"current_steps": 8970, "total_steps": 35625, "loss": 0.5403, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2601466854536774e-05, "epoch": 1.2589473684210526, "percentage": 25.18, "elapsed_time": "1:00:48", "remaining_time": "3:00:42"} +{"current_steps": 8980, "total_steps": 35625, "loss": 0.5305, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2585795236761526e-05, "epoch": 1.2603508771929826, "percentage": 25.21, "elapsed_time": "1:00:52", "remaining_time": "3:00:37"} +{"current_steps": 8990, "total_steps": 35625, "loss": 0.5776, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.257010992784194e-05, "epoch": 1.2617543859649123, "percentage": 25.24, "elapsed_time": "1:00:56", "remaining_time": "3:00:31"} +{"current_steps": 9000, "total_steps": 35625, "loss": 0.5772, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.255441093998956e-05, "epoch": 1.263157894736842, "percentage": 25.26, "elapsed_time": "1:00:59", "remaining_time": "3:00:25"} +{"current_steps": 9010, "total_steps": 35625, "loss": 0.589, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.253869828542659e-05, "epoch": 1.264561403508772, "percentage": 25.29, "elapsed_time": "1:01:02", "remaining_time": "3:00:19"} +{"current_steps": 9020, "total_steps": 35625, "loss": 0.5012, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2522971976385876e-05, "epoch": 1.2659649122807017, "percentage": 25.32, "elapsed_time": "1:01:06", "remaining_time": "3:00:15"} +{"current_steps": 9030, "total_steps": 35625, "loss": 0.4813, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.250723202511089e-05, "epoch": 1.2673684210526317, "percentage": 25.35, "elapsed_time": "1:01:10", "remaining_time": "3:00:11"} +{"current_steps": 9040, "total_steps": 35625, "loss": 0.513, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2491478443855704e-05, "epoch": 1.2687719298245614, "percentage": 25.38, "elapsed_time": "1:01:13", "remaining_time": "3:00:04"} +{"current_steps": 9050, "total_steps": 35625, "loss": 0.6229, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.247571124488504e-05, "epoch": 1.2701754385964912, "percentage": 25.4, "elapsed_time": "1:01:17", "remaining_time": "2:59:57"} +{"current_steps": 9060, "total_steps": 35625, "loss": 0.5493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2459930440474194e-05, "epoch": 1.271578947368421, "percentage": 25.43, "elapsed_time": "1:01:20", "remaining_time": "2:59:51"} +{"current_steps": 9070, "total_steps": 35625, "loss": 0.4845, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2444136042909064e-05, "epoch": 1.2729824561403509, "percentage": 25.46, "elapsed_time": "1:01:23", "remaining_time": "2:59:45"} +{"current_steps": 9080, "total_steps": 35625, "loss": 0.5174, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2428328064486134e-05, "epoch": 1.2743859649122806, "percentage": 25.49, "elapsed_time": "1:01:28", "remaining_time": "2:59:42"} +{"current_steps": 9090, "total_steps": 35625, "loss": 0.501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2412506517512456e-05, "epoch": 1.2757894736842106, "percentage": 25.52, "elapsed_time": "1:01:31", "remaining_time": "2:59:36"} +{"current_steps": 9100, "total_steps": 35625, "loss": 0.6422, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.239667141430564e-05, "epoch": 1.2771929824561403, "percentage": 25.54, "elapsed_time": "1:01:35", "remaining_time": "2:59:31"} +{"current_steps": 9110, "total_steps": 35625, "loss": 0.5323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.238082276719387e-05, "epoch": 1.27859649122807, "percentage": 25.57, "elapsed_time": "1:01:39", "remaining_time": "2:59:26"} +{"current_steps": 9120, "total_steps": 35625, "loss": 0.4542, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.236496058851585e-05, "epoch": 1.28, "percentage": 25.6, "elapsed_time": "1:01:43", "remaining_time": "2:59:24"} +{"current_steps": 9130, "total_steps": 35625, "loss": 0.5697, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.234908489062083e-05, "epoch": 1.2814035087719298, "percentage": 25.63, "elapsed_time": "1:01:48", "remaining_time": "2:59:20"} +{"current_steps": 9140, "total_steps": 35625, "loss": 0.5108, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.233319568586859e-05, "epoch": 1.2828070175438597, "percentage": 25.66, "elapsed_time": "1:01:51", "remaining_time": "2:59:13"} +{"current_steps": 9150, "total_steps": 35625, "loss": 0.4472, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.231729298662942e-05, "epoch": 1.2842105263157895, "percentage": 25.68, "elapsed_time": "1:01:54", "remaining_time": "2:59:06"} +{"current_steps": 9160, "total_steps": 35625, "loss": 0.62, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.230137680528411e-05, "epoch": 1.2856140350877192, "percentage": 25.71, "elapsed_time": "1:01:58", "remaining_time": "2:59:04"} +{"current_steps": 9170, "total_steps": 35625, "loss": 0.5226, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.228544715422395e-05, "epoch": 1.2870175438596492, "percentage": 25.74, "elapsed_time": "1:02:01", "remaining_time": "2:58:57"} +{"current_steps": 9180, "total_steps": 35625, "loss": 0.5492, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2269504045850744e-05, "epoch": 1.288421052631579, "percentage": 25.77, "elapsed_time": "1:02:05", "remaining_time": "2:58:51"} +{"current_steps": 9190, "total_steps": 35625, "loss": 0.5359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.225354749257673e-05, "epoch": 1.2898245614035089, "percentage": 25.8, "elapsed_time": "1:02:08", "remaining_time": "2:58:45"} +{"current_steps": 9200, "total_steps": 35625, "loss": 0.535, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2237577506824624e-05, "epoch": 1.2912280701754386, "percentage": 25.82, "elapsed_time": "1:02:12", "remaining_time": "2:58:39"} +{"current_steps": 9210, "total_steps": 35625, "loss": 0.4581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.222159410102761e-05, "epoch": 1.2926315789473684, "percentage": 25.85, "elapsed_time": "1:02:16", "remaining_time": "2:58:35"} +{"current_steps": 9220, "total_steps": 35625, "loss": 0.5109, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.220559728762933e-05, "epoch": 1.2940350877192983, "percentage": 25.88, "elapsed_time": "1:02:19", "remaining_time": "2:58:29"} +{"current_steps": 9230, "total_steps": 35625, "loss": 0.4501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2189587079083846e-05, "epoch": 1.295438596491228, "percentage": 25.91, "elapsed_time": "1:02:23", "remaining_time": "2:58:24"} +{"current_steps": 9240, "total_steps": 35625, "loss": 0.5574, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.217356348785565e-05, "epoch": 1.296842105263158, "percentage": 25.94, "elapsed_time": "1:02:27", "remaining_time": "2:58:20"} +{"current_steps": 9250, "total_steps": 35625, "loss": 0.5558, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.215752652641967e-05, "epoch": 1.2982456140350878, "percentage": 25.96, "elapsed_time": "1:02:30", "remaining_time": "2:58:13"} +{"current_steps": 9260, "total_steps": 35625, "loss": 0.4734, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.21414762072612e-05, "epoch": 1.2996491228070175, "percentage": 25.99, "elapsed_time": "1:02:35", "remaining_time": "2:58:11"} +{"current_steps": 9270, "total_steps": 35625, "loss": 0.574, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2125412542876e-05, "epoch": 1.3010526315789472, "percentage": 26.02, "elapsed_time": "1:02:39", "remaining_time": "2:58:07"} +{"current_steps": 9280, "total_steps": 35625, "loss": 0.4505, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.210933554577016e-05, "epoch": 1.3024561403508772, "percentage": 26.05, "elapsed_time": "1:02:42", "remaining_time": "2:58:01"} +{"current_steps": 9290, "total_steps": 35625, "loss": 0.5021, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.209324522846018e-05, "epoch": 1.303859649122807, "percentage": 26.08, "elapsed_time": "1:02:45", "remaining_time": "2:57:54"} +{"current_steps": 9300, "total_steps": 35625, "loss": 0.4925, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.207714160347292e-05, "epoch": 1.305263157894737, "percentage": 26.11, "elapsed_time": "1:02:48", "remaining_time": "2:57:48"} +{"current_steps": 9310, "total_steps": 35625, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.206102468334561e-05, "epoch": 1.3066666666666666, "percentage": 26.13, "elapsed_time": "1:02:51", "remaining_time": "2:57:40"} +{"current_steps": 9320, "total_steps": 35625, "loss": 0.5387, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2044894480625825e-05, "epoch": 1.3080701754385964, "percentage": 26.16, "elapsed_time": "1:02:57", "remaining_time": "2:57:41"} +{"current_steps": 9330, "total_steps": 35625, "loss": 0.5788, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.202875100787147e-05, "epoch": 1.3094736842105263, "percentage": 26.19, "elapsed_time": "1:03:00", "remaining_time": "2:57:35"} +{"current_steps": 9340, "total_steps": 35625, "loss": 0.5313, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.201259427765081e-05, "epoch": 1.310877192982456, "percentage": 26.22, "elapsed_time": "1:03:05", "remaining_time": "2:57:34"} +{"current_steps": 9350, "total_steps": 35625, "loss": 0.5948, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1996424302542404e-05, "epoch": 1.312280701754386, "percentage": 26.25, "elapsed_time": "1:03:09", "remaining_time": "2:57:29"} +{"current_steps": 9360, "total_steps": 35625, "loss": 0.4913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.198024109513512e-05, "epoch": 1.3136842105263158, "percentage": 26.27, "elapsed_time": "1:03:13", "remaining_time": "2:57:23"} +{"current_steps": 9370, "total_steps": 35625, "loss": 0.4895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.196404466802816e-05, "epoch": 1.3150877192982455, "percentage": 26.3, "elapsed_time": "1:03:17", "remaining_time": "2:57:19"} +{"current_steps": 9380, "total_steps": 35625, "loss": 0.5537, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.194783503383098e-05, "epoch": 1.3164912280701755, "percentage": 26.33, "elapsed_time": "1:03:20", "remaining_time": "2:57:12"} +{"current_steps": 9390, "total_steps": 35625, "loss": 0.5641, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.193161220516334e-05, "epoch": 1.3178947368421052, "percentage": 26.36, "elapsed_time": "1:03:24", "remaining_time": "2:57:08"} +{"current_steps": 9400, "total_steps": 35625, "loss": 0.464, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.191537619465529e-05, "epoch": 1.3192982456140352, "percentage": 26.39, "elapsed_time": "1:03:28", "remaining_time": "2:57:05"} +{"current_steps": 9410, "total_steps": 35625, "loss": 0.4657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.189912701494709e-05, "epoch": 1.320701754385965, "percentage": 26.41, "elapsed_time": "1:03:32", "remaining_time": "2:56:59"} +{"current_steps": 9420, "total_steps": 35625, "loss": 0.5113, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1882864678689296e-05, "epoch": 1.3221052631578947, "percentage": 26.44, "elapsed_time": "1:03:36", "remaining_time": "2:56:57"} +{"current_steps": 9430, "total_steps": 35625, "loss": 0.5593, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.186658919854269e-05, "epoch": 1.3235087719298246, "percentage": 26.47, "elapsed_time": "1:03:40", "remaining_time": "2:56:52"} +{"current_steps": 9440, "total_steps": 35625, "loss": 0.4578, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1850300587178304e-05, "epoch": 1.3249122807017544, "percentage": 26.5, "elapsed_time": "1:03:44", "remaining_time": "2:56:47"} +{"current_steps": 9450, "total_steps": 35625, "loss": 0.5637, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.183399885727737e-05, "epoch": 1.3263157894736843, "percentage": 26.53, "elapsed_time": "1:03:47", "remaining_time": "2:56:41"} +{"current_steps": 9460, "total_steps": 35625, "loss": 0.5491, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.181768402153135e-05, "epoch": 1.327719298245614, "percentage": 26.55, "elapsed_time": "1:03:50", "remaining_time": "2:56:35"} +{"current_steps": 9470, "total_steps": 35625, "loss": 0.5558, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1801356092641886e-05, "epoch": 1.3291228070175438, "percentage": 26.58, "elapsed_time": "1:03:57", "remaining_time": "2:56:38"} +{"current_steps": 9480, "total_steps": 35625, "loss": 0.4543, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.178501508332085e-05, "epoch": 1.3305263157894736, "percentage": 26.61, "elapsed_time": "1:04:01", "remaining_time": "2:56:35"} +{"current_steps": 9490, "total_steps": 35625, "loss": 0.5832, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.176866100629027e-05, "epoch": 1.3319298245614035, "percentage": 26.64, "elapsed_time": "1:04:05", "remaining_time": "2:56:29"} +{"current_steps": 9500, "total_steps": 35625, "loss": 0.5378, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.175229387428235e-05, "epoch": 1.3333333333333333, "percentage": 26.67, "elapsed_time": "1:04:08", "remaining_time": "2:56:23"} +{"current_steps": 9510, "total_steps": 35625, "loss": 0.5046, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1735913700039477e-05, "epoch": 1.3347368421052632, "percentage": 26.69, "elapsed_time": "1:04:12", "remaining_time": "2:56:20"} +{"current_steps": 9520, "total_steps": 35625, "loss": 0.5171, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.171952049631416e-05, "epoch": 1.336140350877193, "percentage": 26.72, "elapsed_time": "1:04:17", "remaining_time": "2:56:18"} +{"current_steps": 9530, "total_steps": 35625, "loss": 0.5939, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.170311427586908e-05, "epoch": 1.3375438596491227, "percentage": 26.75, "elapsed_time": "1:04:21", "remaining_time": "2:56:13"} +{"current_steps": 9540, "total_steps": 35625, "loss": 0.5768, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.168669505147705e-05, "epoch": 1.3389473684210527, "percentage": 26.78, "elapsed_time": "1:04:25", "remaining_time": "2:56:09"} +{"current_steps": 9550, "total_steps": 35625, "loss": 0.5029, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1670262835920996e-05, "epoch": 1.3403508771929824, "percentage": 26.81, "elapsed_time": "1:04:29", "remaining_time": "2:56:05"} +{"current_steps": 9560, "total_steps": 35625, "loss": 0.4611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1653817641993936e-05, "epoch": 1.3417543859649124, "percentage": 26.84, "elapsed_time": "1:04:33", "remaining_time": "2:56:00"} +{"current_steps": 9570, "total_steps": 35625, "loss": 0.5701, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.163735948249905e-05, "epoch": 1.343157894736842, "percentage": 26.86, "elapsed_time": "1:04:37", "remaining_time": "2:55:57"} +{"current_steps": 9580, "total_steps": 35625, "loss": 0.5356, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.162088837024956e-05, "epoch": 1.3445614035087718, "percentage": 26.89, "elapsed_time": "1:04:41", "remaining_time": "2:55:51"} +{"current_steps": 9590, "total_steps": 35625, "loss": 0.4985, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.16044043180688e-05, "epoch": 1.3459649122807018, "percentage": 26.92, "elapsed_time": "1:04:45", "remaining_time": "2:55:48"} +{"current_steps": 9600, "total_steps": 35625, "loss": 0.5036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.158790733879017e-05, "epoch": 1.3473684210526315, "percentage": 26.95, "elapsed_time": "1:04:48", "remaining_time": "2:55:41"} +{"current_steps": 9610, "total_steps": 35625, "loss": 0.5212, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1571397445257124e-05, "epoch": 1.3487719298245615, "percentage": 26.98, "elapsed_time": "1:04:52", "remaining_time": "2:55:37"} +{"current_steps": 9620, "total_steps": 35625, "loss": 0.5225, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.155487465032319e-05, "epoch": 1.3501754385964913, "percentage": 27.0, "elapsed_time": "1:04:57", "remaining_time": "2:55:35"} +{"current_steps": 9630, "total_steps": 35625, "loss": 0.4985, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.153833896685193e-05, "epoch": 1.351578947368421, "percentage": 27.03, "elapsed_time": "1:05:01", "remaining_time": "2:55:30"} +{"current_steps": 9640, "total_steps": 35625, "loss": 0.5386, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1521790407716936e-05, "epoch": 1.352982456140351, "percentage": 27.06, "elapsed_time": "1:05:04", "remaining_time": "2:55:25"} +{"current_steps": 9650, "total_steps": 35625, "loss": 0.5283, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.150522898580183e-05, "epoch": 1.3543859649122807, "percentage": 27.09, "elapsed_time": "1:05:09", "remaining_time": "2:55:22"} +{"current_steps": 9660, "total_steps": 35625, "loss": 0.5684, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.148865471400024e-05, "epoch": 1.3557894736842107, "percentage": 27.12, "elapsed_time": "1:05:12", "remaining_time": "2:55:17"} +{"current_steps": 9670, "total_steps": 35625, "loss": 0.525, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.147206760521582e-05, "epoch": 1.3571929824561404, "percentage": 27.14, "elapsed_time": "1:05:16", "remaining_time": "2:55:13"} +{"current_steps": 9680, "total_steps": 35625, "loss": 0.5258, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.145546767236219e-05, "epoch": 1.3585964912280701, "percentage": 27.17, "elapsed_time": "1:05:20", "remaining_time": "2:55:08"} +{"current_steps": 9690, "total_steps": 35625, "loss": 0.5159, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.143885492836297e-05, "epoch": 1.3599999999999999, "percentage": 27.2, "elapsed_time": "1:05:24", "remaining_time": "2:55:04"} +{"current_steps": 9700, "total_steps": 35625, "loss": 0.5656, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1422229386151754e-05, "epoch": 1.3614035087719298, "percentage": 27.23, "elapsed_time": "1:05:29", "remaining_time": "2:55:02"} +{"current_steps": 9710, "total_steps": 35625, "loss": 0.488, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.140559105867209e-05, "epoch": 1.3628070175438596, "percentage": 27.26, "elapsed_time": "1:05:33", "remaining_time": "2:54:57"} +{"current_steps": 9720, "total_steps": 35625, "loss": 0.457, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1388939958877495e-05, "epoch": 1.3642105263157895, "percentage": 27.28, "elapsed_time": "1:05:37", "remaining_time": "2:54:53"} +{"current_steps": 9730, "total_steps": 35625, "loss": 0.459, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.137227609973141e-05, "epoch": 1.3656140350877193, "percentage": 27.31, "elapsed_time": "1:05:40", "remaining_time": "2:54:47"} +{"current_steps": 9740, "total_steps": 35625, "loss": 0.4794, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.135559949420723e-05, "epoch": 1.367017543859649, "percentage": 27.34, "elapsed_time": "1:05:44", "remaining_time": "2:54:41"} +{"current_steps": 9750, "total_steps": 35625, "loss": 0.5903, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.133891015528826e-05, "epoch": 1.368421052631579, "percentage": 27.37, "elapsed_time": "1:05:47", "remaining_time": "2:54:35"} +{"current_steps": 9760, "total_steps": 35625, "loss": 0.5521, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.132220809596772e-05, "epoch": 1.3698245614035087, "percentage": 27.4, "elapsed_time": "1:05:51", "remaining_time": "2:54:30"} +{"current_steps": 9770, "total_steps": 35625, "loss": 0.4667, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1305493329248734e-05, "epoch": 1.3712280701754387, "percentage": 27.42, "elapsed_time": "1:05:57", "remaining_time": "2:54:32"} +{"current_steps": 9780, "total_steps": 35625, "loss": 0.4307, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.128876586814433e-05, "epoch": 1.3726315789473684, "percentage": 27.45, "elapsed_time": "1:06:01", "remaining_time": "2:54:27"} +{"current_steps": 9790, "total_steps": 35625, "loss": 0.5016, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.127202572567741e-05, "epoch": 1.3740350877192982, "percentage": 27.48, "elapsed_time": "1:06:04", "remaining_time": "2:54:22"} +{"current_steps": 9800, "total_steps": 35625, "loss": 0.5489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1255272914880735e-05, "epoch": 1.3754385964912281, "percentage": 27.51, "elapsed_time": "1:06:07", "remaining_time": "2:54:15"} +{"current_steps": 9810, "total_steps": 35625, "loss": 0.488, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1238507448796945e-05, "epoch": 1.3768421052631579, "percentage": 27.54, "elapsed_time": "1:06:10", "remaining_time": "2:54:08"} +{"current_steps": 9820, "total_steps": 35625, "loss": 0.5739, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.122172934047855e-05, "epoch": 1.3782456140350878, "percentage": 27.56, "elapsed_time": "1:06:13", "remaining_time": "2:54:01"} +{"current_steps": 9830, "total_steps": 35625, "loss": 0.5036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.120493860298786e-05, "epoch": 1.3796491228070176, "percentage": 27.59, "elapsed_time": "1:06:17", "remaining_time": "2:53:57"} +{"current_steps": 9840, "total_steps": 35625, "loss": 0.4737, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1188135249397056e-05, "epoch": 1.3810526315789473, "percentage": 27.62, "elapsed_time": "1:06:20", "remaining_time": "2:53:50"} +{"current_steps": 9850, "total_steps": 35625, "loss": 0.5341, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.117131929278811e-05, "epoch": 1.3824561403508773, "percentage": 27.65, "elapsed_time": "1:06:24", "remaining_time": "2:53:45"} +{"current_steps": 9860, "total_steps": 35625, "loss": 0.4567, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1154490746252825e-05, "epoch": 1.383859649122807, "percentage": 27.68, "elapsed_time": "1:06:27", "remaining_time": "2:53:39"} +{"current_steps": 9870, "total_steps": 35625, "loss": 0.5586, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.113764962289281e-05, "epoch": 1.385263157894737, "percentage": 27.71, "elapsed_time": "1:06:30", "remaining_time": "2:53:33"} +{"current_steps": 9880, "total_steps": 35625, "loss": 0.5065, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.112079593581944e-05, "epoch": 1.3866666666666667, "percentage": 27.73, "elapsed_time": "1:06:35", "remaining_time": "2:53:30"} +{"current_steps": 9890, "total_steps": 35625, "loss": 0.5335, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.110392969815391e-05, "epoch": 1.3880701754385965, "percentage": 27.76, "elapsed_time": "1:06:38", "remaining_time": "2:53:24"} +{"current_steps": 9900, "total_steps": 35625, "loss": 0.5445, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.108705092302715e-05, "epoch": 1.3894736842105262, "percentage": 27.79, "elapsed_time": "1:06:42", "remaining_time": "2:53:21"} +{"current_steps": 9910, "total_steps": 35625, "loss": 0.5146, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1070159623579855e-05, "epoch": 1.3908771929824562, "percentage": 27.82, "elapsed_time": "1:06:48", "remaining_time": "2:53:21"} +{"current_steps": 9920, "total_steps": 35625, "loss": 0.4938, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.105325581296251e-05, "epoch": 1.392280701754386, "percentage": 27.85, "elapsed_time": "1:06:51", "remaining_time": "2:53:14"} +{"current_steps": 9930, "total_steps": 35625, "loss": 0.5353, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.103633950433528e-05, "epoch": 1.3936842105263159, "percentage": 27.87, "elapsed_time": "1:06:55", "remaining_time": "2:53:11"} +{"current_steps": 9940, "total_steps": 35625, "loss": 0.4869, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1019410710868115e-05, "epoch": 1.3950877192982456, "percentage": 27.9, "elapsed_time": "1:06:59", "remaining_time": "2:53:05"} +{"current_steps": 9950, "total_steps": 35625, "loss": 0.4858, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.100246944574064e-05, "epoch": 1.3964912280701753, "percentage": 27.93, "elapsed_time": "1:07:04", "remaining_time": "2:53:04"} +{"current_steps": 9960, "total_steps": 35625, "loss": 0.5173, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.098551572214223e-05, "epoch": 1.3978947368421053, "percentage": 27.96, "elapsed_time": "1:07:09", "remaining_time": "2:53:02"} +{"current_steps": 9970, "total_steps": 35625, "loss": 0.5862, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0968549553271926e-05, "epoch": 1.399298245614035, "percentage": 27.99, "elapsed_time": "1:07:12", "remaining_time": "2:52:55"} +{"current_steps": 9980, "total_steps": 35625, "loss": 0.5312, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.095157095233848e-05, "epoch": 1.400701754385965, "percentage": 28.01, "elapsed_time": "1:07:16", "remaining_time": "2:52:51"} +{"current_steps": 9990, "total_steps": 35625, "loss": 0.4668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.093457993256031e-05, "epoch": 1.4021052631578947, "percentage": 28.04, "elapsed_time": "1:07:20", "remaining_time": "2:52:48"} +{"current_steps": 10000, "total_steps": 35625, "loss": 0.5192, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0917576507165514e-05, "epoch": 1.4035087719298245, "percentage": 28.07, "elapsed_time": "1:07:24", "remaining_time": "2:52:43"} +{"current_steps": 10000, "total_steps": 35625, "loss": null, "eval_loss": 0.6501449942588806, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.4035087719298245, "percentage": 28.07, "elapsed_time": "1:07:24", "remaining_time": "2:52:43"} +{"current_steps": 10010, "total_steps": 35625, "loss": 0.6369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.090056068939183e-05, "epoch": 1.4049122807017544, "percentage": 28.1, "elapsed_time": "1:08:12", "remaining_time": "2:54:31"} +{"current_steps": 10020, "total_steps": 35625, "loss": 0.4765, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.088353249248667e-05, "epoch": 1.4063157894736842, "percentage": 28.13, "elapsed_time": "1:08:15", "remaining_time": "2:54:26"} +{"current_steps": 10030, "total_steps": 35625, "loss": 0.4858, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0866491929707064e-05, "epoch": 1.4077192982456141, "percentage": 28.15, "elapsed_time": "1:08:20", "remaining_time": "2:54:22"} +{"current_steps": 10040, "total_steps": 35625, "loss": 0.4502, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.084943901431966e-05, "epoch": 1.4091228070175439, "percentage": 28.18, "elapsed_time": "1:08:23", "remaining_time": "2:54:16"} +{"current_steps": 10050, "total_steps": 35625, "loss": 0.5036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.083237375960075e-05, "epoch": 1.4105263157894736, "percentage": 28.21, "elapsed_time": "1:08:27", "remaining_time": "2:54:12"} +{"current_steps": 10060, "total_steps": 35625, "loss": 0.5185, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.081529617883622e-05, "epoch": 1.4119298245614036, "percentage": 28.24, "elapsed_time": "1:08:32", "remaining_time": "2:54:09"} +{"current_steps": 10070, "total_steps": 35625, "loss": 0.4701, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.079820628532155e-05, "epoch": 1.4133333333333333, "percentage": 28.27, "elapsed_time": "1:08:35", "remaining_time": "2:54:04"} +{"current_steps": 10080, "total_steps": 35625, "loss": 0.5406, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0781104092361813e-05, "epoch": 1.4147368421052633, "percentage": 28.29, "elapsed_time": "1:08:38", "remaining_time": "2:53:57"} +{"current_steps": 10090, "total_steps": 35625, "loss": 0.4963, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0763989613271635e-05, "epoch": 1.416140350877193, "percentage": 28.32, "elapsed_time": "1:08:42", "remaining_time": "2:53:53"} +{"current_steps": 10100, "total_steps": 35625, "loss": 0.5931, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0746862861375245e-05, "epoch": 1.4175438596491228, "percentage": 28.35, "elapsed_time": "1:08:46", "remaining_time": "2:53:48"} +{"current_steps": 10110, "total_steps": 35625, "loss": 0.4908, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.07297238500064e-05, "epoch": 1.4189473684210525, "percentage": 28.38, "elapsed_time": "1:08:50", "remaining_time": "2:53:44"} +{"current_steps": 10120, "total_steps": 35625, "loss": 0.5732, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0712572592508394e-05, "epoch": 1.4203508771929825, "percentage": 28.41, "elapsed_time": "1:08:54", "remaining_time": "2:53:38"} +{"current_steps": 10130, "total_steps": 35625, "loss": 0.5323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.069540910223409e-05, "epoch": 1.4217543859649122, "percentage": 28.44, "elapsed_time": "1:08:57", "remaining_time": "2:53:34"} +{"current_steps": 10140, "total_steps": 35625, "loss": 0.5727, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.067823339254584e-05, "epoch": 1.4231578947368422, "percentage": 28.46, "elapsed_time": "1:09:02", "remaining_time": "2:53:31"} +{"current_steps": 10150, "total_steps": 35625, "loss": 0.5295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.066104547681553e-05, "epoch": 1.424561403508772, "percentage": 28.49, "elapsed_time": "1:09:05", "remaining_time": "2:53:25"} +{"current_steps": 10160, "total_steps": 35625, "loss": 0.554, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0643845368424545e-05, "epoch": 1.4259649122807017, "percentage": 28.52, "elapsed_time": "1:09:10", "remaining_time": "2:53:24"} +{"current_steps": 10170, "total_steps": 35625, "loss": 0.5138, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.062663308076374e-05, "epoch": 1.4273684210526316, "percentage": 28.55, "elapsed_time": "1:09:14", "remaining_time": "2:53:17"} +{"current_steps": 10180, "total_steps": 35625, "loss": 0.543, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0609408627233494e-05, "epoch": 1.4287719298245614, "percentage": 28.58, "elapsed_time": "1:09:18", "remaining_time": "2:53:15"} +{"current_steps": 10190, "total_steps": 35625, "loss": 0.5094, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.059217202124361e-05, "epoch": 1.4301754385964913, "percentage": 28.6, "elapsed_time": "1:09:21", "remaining_time": "2:53:08"} +{"current_steps": 10200, "total_steps": 35625, "loss": 0.5468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0574923276213405e-05, "epoch": 1.431578947368421, "percentage": 28.63, "elapsed_time": "1:09:26", "remaining_time": "2:53:04"} +{"current_steps": 10210, "total_steps": 35625, "loss": 0.6082, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0557662405571595e-05, "epoch": 1.4329824561403508, "percentage": 28.66, "elapsed_time": "1:09:29", "remaining_time": "2:52:58"} +{"current_steps": 10220, "total_steps": 35625, "loss": 0.5164, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.054038942275637e-05, "epoch": 1.4343859649122808, "percentage": 28.69, "elapsed_time": "1:09:32", "remaining_time": "2:52:51"} +{"current_steps": 10230, "total_steps": 35625, "loss": 0.5451, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.052310434121533e-05, "epoch": 1.4357894736842105, "percentage": 28.72, "elapsed_time": "1:09:36", "remaining_time": "2:52:47"} +{"current_steps": 10240, "total_steps": 35625, "loss": 0.5821, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.050580717440552e-05, "epoch": 1.4371929824561405, "percentage": 28.74, "elapsed_time": "1:09:39", "remaining_time": "2:52:41"} +{"current_steps": 10250, "total_steps": 35625, "loss": 0.5088, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.048849793579337e-05, "epoch": 1.4385964912280702, "percentage": 28.77, "elapsed_time": "1:09:43", "remaining_time": "2:52:37"} +{"current_steps": 10260, "total_steps": 35625, "loss": 0.5441, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.04711766388547e-05, "epoch": 1.44, "percentage": 28.8, "elapsed_time": "1:09:47", "remaining_time": "2:52:31"} +{"current_steps": 10270, "total_steps": 35625, "loss": 0.494, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0453843297074756e-05, "epoch": 1.4414035087719297, "percentage": 28.83, "elapsed_time": "1:09:51", "remaining_time": "2:52:28"} +{"current_steps": 10280, "total_steps": 35625, "loss": 0.571, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.043649792394812e-05, "epoch": 1.4428070175438596, "percentage": 28.86, "elapsed_time": "1:09:55", "remaining_time": "2:52:22"} +{"current_steps": 10290, "total_steps": 35625, "loss": 0.5845, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.041914053297878e-05, "epoch": 1.4442105263157896, "percentage": 28.88, "elapsed_time": "1:09:58", "remaining_time": "2:52:17"} +{"current_steps": 10300, "total_steps": 35625, "loss": 0.4655, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0401771137680046e-05, "epoch": 1.4456140350877194, "percentage": 28.91, "elapsed_time": "1:10:01", "remaining_time": "2:52:11"} +{"current_steps": 10310, "total_steps": 35625, "loss": 0.4939, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.038438975157458e-05, "epoch": 1.447017543859649, "percentage": 28.94, "elapsed_time": "1:10:05", "remaining_time": "2:52:05"} +{"current_steps": 10320, "total_steps": 35625, "loss": 0.6172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.036699638819441e-05, "epoch": 1.4484210526315788, "percentage": 28.97, "elapsed_time": "1:10:09", "remaining_time": "2:52:02"} +{"current_steps": 10330, "total_steps": 35625, "loss": 0.4888, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0349591061080846e-05, "epoch": 1.4498245614035088, "percentage": 29.0, "elapsed_time": "1:10:17", "remaining_time": "2:52:07"} +{"current_steps": 10340, "total_steps": 35625, "loss": 0.4427, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0332173783784536e-05, "epoch": 1.4512280701754385, "percentage": 29.02, "elapsed_time": "1:10:26", "remaining_time": "2:52:14"} +{"current_steps": 10350, "total_steps": 35625, "loss": 0.4867, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.031474456986543e-05, "epoch": 1.4526315789473685, "percentage": 29.05, "elapsed_time": "1:10:36", "remaining_time": "2:52:26"} +{"current_steps": 10360, "total_steps": 35625, "loss": 0.4401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0297303432892775e-05, "epoch": 1.4540350877192982, "percentage": 29.08, "elapsed_time": "1:10:44", "remaining_time": "2:52:31"} +{"current_steps": 10370, "total_steps": 35625, "loss": 0.546, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.027985038644507e-05, "epoch": 1.455438596491228, "percentage": 29.11, "elapsed_time": "1:10:52", "remaining_time": "2:52:35"} +{"current_steps": 10380, "total_steps": 35625, "loss": 0.5211, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.026238544411014e-05, "epoch": 1.456842105263158, "percentage": 29.14, "elapsed_time": "1:11:00", "remaining_time": "2:52:40"} +{"current_steps": 10390, "total_steps": 35625, "loss": 0.4633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.024490861948503e-05, "epoch": 1.4582456140350877, "percentage": 29.16, "elapsed_time": "1:11:11", "remaining_time": "2:52:53"} +{"current_steps": 10400, "total_steps": 35625, "loss": 0.5898, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.022741992617603e-05, "epoch": 1.4596491228070176, "percentage": 29.19, "elapsed_time": "1:11:18", "remaining_time": "2:52:58"} +{"current_steps": 10410, "total_steps": 35625, "loss": 0.4944, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.020991937779872e-05, "epoch": 1.4610526315789474, "percentage": 29.22, "elapsed_time": "1:11:30", "remaining_time": "2:53:11"} +{"current_steps": 10420, "total_steps": 35625, "loss": 0.55, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.019240698797785e-05, "epoch": 1.4624561403508771, "percentage": 29.25, "elapsed_time": "1:11:41", "remaining_time": "2:53:23"} +{"current_steps": 10430, "total_steps": 35625, "loss": 0.5103, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.017488277034742e-05, "epoch": 1.463859649122807, "percentage": 29.28, "elapsed_time": "1:11:50", "remaining_time": "2:53:31"} +{"current_steps": 10440, "total_steps": 35625, "loss": 0.5073, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.015734673855065e-05, "epoch": 1.4652631578947368, "percentage": 29.31, "elapsed_time": "1:11:59", "remaining_time": "2:53:39"} +{"current_steps": 10450, "total_steps": 35625, "loss": 0.5588, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.013979890623992e-05, "epoch": 1.4666666666666668, "percentage": 29.33, "elapsed_time": "1:12:07", "remaining_time": "2:53:44"} +{"current_steps": 10460, "total_steps": 35625, "loss": 0.5984, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0122239287076834e-05, "epoch": 1.4680701754385965, "percentage": 29.36, "elapsed_time": "1:12:16", "remaining_time": "2:53:52"} +{"current_steps": 10470, "total_steps": 35625, "loss": 0.5437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.010466789473215e-05, "epoch": 1.4694736842105263, "percentage": 29.39, "elapsed_time": "1:12:24", "remaining_time": "2:53:57"} +{"current_steps": 10480, "total_steps": 35625, "loss": 0.4573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.008708474288581e-05, "epoch": 1.470877192982456, "percentage": 29.42, "elapsed_time": "1:12:34", "remaining_time": "2:54:08"} +{"current_steps": 10490, "total_steps": 35625, "loss": 0.5319, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.006948984522687e-05, "epoch": 1.472280701754386, "percentage": 29.45, "elapsed_time": "1:12:42", "remaining_time": "2:54:11"} +{"current_steps": 10500, "total_steps": 35625, "loss": 0.4559, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.00518832154536e-05, "epoch": 1.4736842105263157, "percentage": 29.47, "elapsed_time": "1:12:55", "remaining_time": "2:54:28"} +{"current_steps": 10510, "total_steps": 35625, "loss": 0.4776, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.003426486727335e-05, "epoch": 1.4750877192982457, "percentage": 29.5, "elapsed_time": "1:13:04", "remaining_time": "2:54:37"} +{"current_steps": 10520, "total_steps": 35625, "loss": 0.5672, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.00166348144026e-05, "epoch": 1.4764912280701754, "percentage": 29.53, "elapsed_time": "1:13:12", "remaining_time": "2:54:42"} +{"current_steps": 10530, "total_steps": 35625, "loss": 0.5926, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9998993070566954e-05, "epoch": 1.4778947368421052, "percentage": 29.56, "elapsed_time": "1:13:20", "remaining_time": "2:54:47"} +{"current_steps": 10540, "total_steps": 35625, "loss": 0.4975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.998133964950112e-05, "epoch": 1.4792982456140351, "percentage": 29.59, "elapsed_time": "1:13:28", "remaining_time": "2:54:51"} +{"current_steps": 10550, "total_steps": 35625, "loss": 0.5229, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9963674564948886e-05, "epoch": 1.4807017543859649, "percentage": 29.61, "elapsed_time": "1:13:38", "remaining_time": "2:55:02"} +{"current_steps": 10560, "total_steps": 35625, "loss": 0.4995, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9945997830663126e-05, "epoch": 1.4821052631578948, "percentage": 29.64, "elapsed_time": "1:13:46", "remaining_time": "2:55:05"} +{"current_steps": 10570, "total_steps": 35625, "loss": 0.5464, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.992830946040579e-05, "epoch": 1.4835087719298246, "percentage": 29.67, "elapsed_time": "1:13:54", "remaining_time": "2:55:12"} +{"current_steps": 10580, "total_steps": 35625, "loss": 0.5691, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9910609467947866e-05, "epoch": 1.4849122807017543, "percentage": 29.7, "elapsed_time": "1:14:02", "remaining_time": "2:55:16"} +{"current_steps": 10590, "total_steps": 35625, "loss": 0.5982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.989289786706942e-05, "epoch": 1.4863157894736843, "percentage": 29.73, "elapsed_time": "1:14:10", "remaining_time": "2:55:20"} +{"current_steps": 10600, "total_steps": 35625, "loss": 0.5173, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.987517467155954e-05, "epoch": 1.487719298245614, "percentage": 29.75, "elapsed_time": "1:14:18", "remaining_time": "2:55:25"} +{"current_steps": 10610, "total_steps": 35625, "loss": 0.4406, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.985743989521633e-05, "epoch": 1.489122807017544, "percentage": 29.78, "elapsed_time": "1:14:26", "remaining_time": "2:55:30"} +{"current_steps": 10620, "total_steps": 35625, "loss": 0.5417, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9839693551846924e-05, "epoch": 1.4905263157894737, "percentage": 29.81, "elapsed_time": "1:14:34", "remaining_time": "2:55:34"} +{"current_steps": 10630, "total_steps": 35625, "loss": 0.6226, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.982193565526747e-05, "epoch": 1.4919298245614034, "percentage": 29.84, "elapsed_time": "1:14:43", "remaining_time": "2:55:41"} +{"current_steps": 10640, "total_steps": 35625, "loss": 0.5337, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9804166219303086e-05, "epoch": 1.4933333333333334, "percentage": 29.87, "elapsed_time": "1:14:53", "remaining_time": "2:55:52"} +{"current_steps": 10650, "total_steps": 35625, "loss": 0.5027, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9786385257787886e-05, "epoch": 1.4947368421052631, "percentage": 29.89, "elapsed_time": "1:15:03", "remaining_time": "2:56:02"} +{"current_steps": 10660, "total_steps": 35625, "loss": 0.5676, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9768592784564974e-05, "epoch": 1.496140350877193, "percentage": 29.92, "elapsed_time": "1:15:10", "remaining_time": "2:56:03"} +{"current_steps": 10670, "total_steps": 35625, "loss": 0.4542, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.975078881348638e-05, "epoch": 1.4975438596491228, "percentage": 29.95, "elapsed_time": "1:15:21", "remaining_time": "2:56:14"} +{"current_steps": 10680, "total_steps": 35625, "loss": 0.5563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9732973358413115e-05, "epoch": 1.4989473684210526, "percentage": 29.98, "elapsed_time": "1:15:33", "remaining_time": "2:56:29"} +{"current_steps": 10690, "total_steps": 35625, "loss": 0.4128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.971514643321513e-05, "epoch": 1.5003508771929823, "percentage": 30.01, "elapsed_time": "1:15:41", "remaining_time": "2:56:33"} +{"current_steps": 10700, "total_steps": 35625, "loss": 0.4909, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.969730805177129e-05, "epoch": 1.5017543859649123, "percentage": 30.04, "elapsed_time": "1:15:51", "remaining_time": "2:56:42"} +{"current_steps": 10710, "total_steps": 35625, "loss": 0.5664, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.967945822796938e-05, "epoch": 1.5031578947368422, "percentage": 30.06, "elapsed_time": "1:16:02", "remaining_time": "2:56:53"} +{"current_steps": 10720, "total_steps": 35625, "loss": 0.5827, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9661596975706104e-05, "epoch": 1.504561403508772, "percentage": 30.09, "elapsed_time": "1:16:12", "remaining_time": "2:57:02"} +{"current_steps": 10730, "total_steps": 35625, "loss": 0.5105, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9643724308887065e-05, "epoch": 1.5059649122807017, "percentage": 30.12, "elapsed_time": "1:16:20", "remaining_time": "2:57:08"} +{"current_steps": 10740, "total_steps": 35625, "loss": 0.5455, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.962584024142675e-05, "epoch": 1.5073684210526315, "percentage": 30.15, "elapsed_time": "1:16:31", "remaining_time": "2:57:18"} +{"current_steps": 10750, "total_steps": 35625, "loss": 0.5419, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.96079447872485e-05, "epoch": 1.5087719298245614, "percentage": 30.18, "elapsed_time": "1:16:39", "remaining_time": "2:57:23"} +{"current_steps": 10760, "total_steps": 35625, "loss": 0.5861, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9590037960284546e-05, "epoch": 1.5101754385964914, "percentage": 30.2, "elapsed_time": "1:16:49", "remaining_time": "2:57:31"} +{"current_steps": 10770, "total_steps": 35625, "loss": 0.5692, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9572119774475975e-05, "epoch": 1.5115789473684211, "percentage": 30.23, "elapsed_time": "1:16:58", "remaining_time": "2:57:38"} +{"current_steps": 10780, "total_steps": 35625, "loss": 0.5345, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.95541902437727e-05, "epoch": 1.5129824561403509, "percentage": 30.26, "elapsed_time": "1:17:08", "remaining_time": "2:57:46"} +{"current_steps": 10790, "total_steps": 35625, "loss": 0.5212, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.953624938213348e-05, "epoch": 1.5143859649122806, "percentage": 30.29, "elapsed_time": "1:17:18", "remaining_time": "2:57:57"} +{"current_steps": 10800, "total_steps": 35625, "loss": 0.4838, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.95182972035259e-05, "epoch": 1.5157894736842106, "percentage": 30.32, "elapsed_time": "1:17:29", "remaining_time": "2:58:06"} +{"current_steps": 10810, "total_steps": 35625, "loss": 0.5011, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.950033372192633e-05, "epoch": 1.5171929824561403, "percentage": 30.34, "elapsed_time": "1:17:36", "remaining_time": "2:58:10"} +{"current_steps": 10820, "total_steps": 35625, "loss": 0.5043, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.948235895131997e-05, "epoch": 1.5185964912280703, "percentage": 30.37, "elapsed_time": "1:17:46", "remaining_time": "2:58:17"} +{"current_steps": 10830, "total_steps": 35625, "loss": 0.5062, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.946437290570078e-05, "epoch": 1.52, "percentage": 30.4, "elapsed_time": "1:17:57", "remaining_time": "2:58:28"} +{"current_steps": 10840, "total_steps": 35625, "loss": 0.6164, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.944637559907152e-05, "epoch": 1.5214035087719298, "percentage": 30.43, "elapsed_time": "1:18:04", "remaining_time": "2:58:30"} +{"current_steps": 10850, "total_steps": 35625, "loss": 0.5159, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9428367045443704e-05, "epoch": 1.5228070175438595, "percentage": 30.46, "elapsed_time": "1:18:14", "remaining_time": "2:58:38"} +{"current_steps": 10860, "total_steps": 35625, "loss": 0.6505, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.941034725883762e-05, "epoch": 1.5242105263157895, "percentage": 30.48, "elapsed_time": "1:18:21", "remaining_time": "2:58:41"} +{"current_steps": 10870, "total_steps": 35625, "loss": 0.4808, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.939231625328229e-05, "epoch": 1.5256140350877194, "percentage": 30.51, "elapsed_time": "1:18:34", "remaining_time": "2:58:56"} +{"current_steps": 10880, "total_steps": 35625, "loss": 0.5194, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9374274042815465e-05, "epoch": 1.5270175438596492, "percentage": 30.54, "elapsed_time": "1:18:41", "remaining_time": "2:58:59"} +{"current_steps": 10890, "total_steps": 35625, "loss": 0.5079, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.935622064148361e-05, "epoch": 1.528421052631579, "percentage": 30.57, "elapsed_time": "1:18:51", "remaining_time": "2:59:07"} +{"current_steps": 10900, "total_steps": 35625, "loss": 0.4808, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9338156063341946e-05, "epoch": 1.5298245614035086, "percentage": 30.6, "elapsed_time": "1:19:00", "remaining_time": "2:59:12"} +{"current_steps": 10910, "total_steps": 35625, "loss": 0.429, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.932008032245434e-05, "epoch": 1.5312280701754386, "percentage": 30.62, "elapsed_time": "1:19:07", "remaining_time": "2:59:14"} +{"current_steps": 10920, "total_steps": 35625, "loss": 0.489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.930199343289339e-05, "epoch": 1.5326315789473686, "percentage": 30.65, "elapsed_time": "1:19:17", "remaining_time": "2:59:23"} +{"current_steps": 10930, "total_steps": 35625, "loss": 0.4881, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9283895408740355e-05, "epoch": 1.5340350877192983, "percentage": 30.68, "elapsed_time": "1:19:26", "remaining_time": "2:59:30"} +{"current_steps": 10940, "total_steps": 35625, "loss": 0.5913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.926578626408517e-05, "epoch": 1.535438596491228, "percentage": 30.71, "elapsed_time": "1:19:34", "remaining_time": "2:59:33"} +{"current_steps": 10950, "total_steps": 35625, "loss": 0.4719, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.924766601302642e-05, "epoch": 1.5368421052631578, "percentage": 30.74, "elapsed_time": "1:19:44", "remaining_time": "2:59:40"} +{"current_steps": 10960, "total_steps": 35625, "loss": 0.5445, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9229534669671344e-05, "epoch": 1.5382456140350877, "percentage": 30.76, "elapsed_time": "1:19:53", "remaining_time": "2:59:46"} +{"current_steps": 10970, "total_steps": 35625, "loss": 0.4989, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9211392248135815e-05, "epoch": 1.5396491228070175, "percentage": 30.79, "elapsed_time": "1:20:01", "remaining_time": "2:59:50"} +{"current_steps": 10980, "total_steps": 35625, "loss": 0.5321, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9193238762544325e-05, "epoch": 1.5410526315789475, "percentage": 30.82, "elapsed_time": "1:20:11", "remaining_time": "2:59:58"} +{"current_steps": 10990, "total_steps": 35625, "loss": 0.4765, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9175074227029996e-05, "epoch": 1.5424561403508772, "percentage": 30.85, "elapsed_time": "1:20:20", "remaining_time": "3:00:05"} +{"current_steps": 11000, "total_steps": 35625, "loss": 0.5748, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.915689865573454e-05, "epoch": 1.543859649122807, "percentage": 30.88, "elapsed_time": "1:20:28", "remaining_time": "3:00:10"} +{"current_steps": 11010, "total_steps": 35625, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.913871206280824e-05, "epoch": 1.545263157894737, "percentage": 30.91, "elapsed_time": "1:20:39", "remaining_time": "3:00:18"} +{"current_steps": 11020, "total_steps": 35625, "loss": 0.507, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.912051446241001e-05, "epoch": 1.5466666666666666, "percentage": 30.93, "elapsed_time": "1:20:47", "remaining_time": "3:00:23"} +{"current_steps": 11030, "total_steps": 35625, "loss": 0.5738, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.910230586870729e-05, "epoch": 1.5480701754385966, "percentage": 30.96, "elapsed_time": "1:20:55", "remaining_time": "3:00:27"} +{"current_steps": 11040, "total_steps": 35625, "loss": 0.4437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.90840862958761e-05, "epoch": 1.5494736842105263, "percentage": 30.99, "elapsed_time": "1:21:03", "remaining_time": "3:00:31"} +{"current_steps": 11050, "total_steps": 35625, "loss": 0.4859, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9065855758101e-05, "epoch": 1.550877192982456, "percentage": 31.02, "elapsed_time": "1:21:16", "remaining_time": "3:00:46"} +{"current_steps": 11060, "total_steps": 35625, "loss": 0.5433, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.904761426957509e-05, "epoch": 1.5522807017543858, "percentage": 31.05, "elapsed_time": "1:21:27", "remaining_time": "3:00:56"} +{"current_steps": 11070, "total_steps": 35625, "loss": 0.5938, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.902936184449999e-05, "epoch": 1.5536842105263158, "percentage": 31.07, "elapsed_time": "1:21:36", "remaining_time": "3:01:00"} +{"current_steps": 11080, "total_steps": 35625, "loss": 0.4484, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.901109849708585e-05, "epoch": 1.5550877192982457, "percentage": 31.1, "elapsed_time": "1:21:46", "remaining_time": "3:01:08"} +{"current_steps": 11090, "total_steps": 35625, "loss": 0.4353, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8992824241551295e-05, "epoch": 1.5564912280701755, "percentage": 31.13, "elapsed_time": "1:21:55", "remaining_time": "3:01:13"} +{"current_steps": 11100, "total_steps": 35625, "loss": 0.4497, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.897453909212348e-05, "epoch": 1.5578947368421052, "percentage": 31.16, "elapsed_time": "1:22:03", "remaining_time": "3:01:17"} +{"current_steps": 11110, "total_steps": 35625, "loss": 0.4648, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.895624306303799e-05, "epoch": 1.559298245614035, "percentage": 31.19, "elapsed_time": "1:22:10", "remaining_time": "3:01:20"} +{"current_steps": 11120, "total_steps": 35625, "loss": 0.5921, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.893793616853894e-05, "epoch": 1.560701754385965, "percentage": 31.21, "elapsed_time": "1:22:18", "remaining_time": "3:01:22"} +{"current_steps": 11130, "total_steps": 35625, "loss": 0.4611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.891961842287886e-05, "epoch": 1.5621052631578949, "percentage": 31.24, "elapsed_time": "1:22:25", "remaining_time": "3:01:25"} +{"current_steps": 11140, "total_steps": 35625, "loss": 0.4745, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.890128984031876e-05, "epoch": 1.5635087719298246, "percentage": 31.27, "elapsed_time": "1:22:35", "remaining_time": "3:01:32"} +{"current_steps": 11150, "total_steps": 35625, "loss": 0.5716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.888295043512804e-05, "epoch": 1.5649122807017544, "percentage": 31.3, "elapsed_time": "1:22:48", "remaining_time": "3:01:45"} +{"current_steps": 11160, "total_steps": 35625, "loss": 0.5193, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.886460022158458e-05, "epoch": 1.566315789473684, "percentage": 31.33, "elapsed_time": "1:22:55", "remaining_time": "3:01:47"} +{"current_steps": 11170, "total_steps": 35625, "loss": 0.4974, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.884623921397463e-05, "epoch": 1.567719298245614, "percentage": 31.35, "elapsed_time": "1:23:02", "remaining_time": "3:01:49"} +{"current_steps": 11180, "total_steps": 35625, "loss": 0.4418, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.882786742659289e-05, "epoch": 1.5691228070175438, "percentage": 31.38, "elapsed_time": "1:23:12", "remaining_time": "3:01:55"} +{"current_steps": 11190, "total_steps": 35625, "loss": 0.5278, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.880948487374241e-05, "epoch": 1.5705263157894738, "percentage": 31.41, "elapsed_time": "1:23:21", "remaining_time": "3:02:02"} +{"current_steps": 11200, "total_steps": 35625, "loss": 0.476, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8791091569734625e-05, "epoch": 1.5719298245614035, "percentage": 31.44, "elapsed_time": "1:23:31", "remaining_time": "3:02:08"} +{"current_steps": 11210, "total_steps": 35625, "loss": 0.581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8772687528889385e-05, "epoch": 1.5733333333333333, "percentage": 31.47, "elapsed_time": "1:23:41", "remaining_time": "3:02:15"} +{"current_steps": 11220, "total_steps": 35625, "loss": 0.5076, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.875427276553485e-05, "epoch": 1.5747368421052632, "percentage": 31.49, "elapsed_time": "1:23:49", "remaining_time": "3:02:20"} +{"current_steps": 11230, "total_steps": 35625, "loss": 0.5177, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.873584729400753e-05, "epoch": 1.576140350877193, "percentage": 31.52, "elapsed_time": "1:23:58", "remaining_time": "3:02:24"} +{"current_steps": 11240, "total_steps": 35625, "loss": 0.5348, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8717411128652304e-05, "epoch": 1.577543859649123, "percentage": 31.55, "elapsed_time": "1:24:07", "remaining_time": "3:02:30"} +{"current_steps": 11250, "total_steps": 35625, "loss": 0.4699, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.869896428382236e-05, "epoch": 1.5789473684210527, "percentage": 31.58, "elapsed_time": "1:24:16", "remaining_time": "3:02:35"} +{"current_steps": 11260, "total_steps": 35625, "loss": 0.5403, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8680506773879184e-05, "epoch": 1.5803508771929824, "percentage": 31.61, "elapsed_time": "1:24:25", "remaining_time": "3:02:40"} +{"current_steps": 11270, "total_steps": 35625, "loss": 0.4982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8662038613192596e-05, "epoch": 1.5817543859649121, "percentage": 31.64, "elapsed_time": "1:24:34", "remaining_time": "3:02:45"} +{"current_steps": 11280, "total_steps": 35625, "loss": 0.5025, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8643559816140685e-05, "epoch": 1.583157894736842, "percentage": 31.66, "elapsed_time": "1:24:43", "remaining_time": "3:02:51"} +{"current_steps": 11290, "total_steps": 35625, "loss": 0.4716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.862507039710982e-05, "epoch": 1.584561403508772, "percentage": 31.69, "elapsed_time": "1:24:51", "remaining_time": "3:02:54"} +{"current_steps": 11300, "total_steps": 35625, "loss": 0.5378, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.860657037049466e-05, "epoch": 1.5859649122807018, "percentage": 31.72, "elapsed_time": "1:25:00", "remaining_time": "3:02:59"} +{"current_steps": 11310, "total_steps": 35625, "loss": 0.5017, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.85880597506981e-05, "epoch": 1.5873684210526315, "percentage": 31.75, "elapsed_time": "1:25:08", "remaining_time": "3:03:02"} +{"current_steps": 11320, "total_steps": 35625, "loss": 0.4612, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.856953855213131e-05, "epoch": 1.5887719298245613, "percentage": 31.78, "elapsed_time": "1:25:15", "remaining_time": "3:03:04"} +{"current_steps": 11330, "total_steps": 35625, "loss": 0.5077, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.855100678921365e-05, "epoch": 1.5901754385964912, "percentage": 31.8, "elapsed_time": "1:25:26", "remaining_time": "3:03:13"} +{"current_steps": 11340, "total_steps": 35625, "loss": 0.5643, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8532464476372765e-05, "epoch": 1.5915789473684212, "percentage": 31.83, "elapsed_time": "1:25:35", "remaining_time": "3:03:17"} +{"current_steps": 11350, "total_steps": 35625, "loss": 0.4939, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.851391162804445e-05, "epoch": 1.592982456140351, "percentage": 31.86, "elapsed_time": "1:25:44", "remaining_time": "3:03:23"} +{"current_steps": 11360, "total_steps": 35625, "loss": 0.5191, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.849534825867275e-05, "epoch": 1.5943859649122807, "percentage": 31.89, "elapsed_time": "1:25:54", "remaining_time": "3:03:29"} +{"current_steps": 11370, "total_steps": 35625, "loss": 0.5361, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.847677438270988e-05, "epoch": 1.5957894736842104, "percentage": 31.92, "elapsed_time": "1:26:02", "remaining_time": "3:03:32"} +{"current_steps": 11380, "total_steps": 35625, "loss": 0.5005, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.845819001461625e-05, "epoch": 1.5971929824561404, "percentage": 31.94, "elapsed_time": "1:26:11", "remaining_time": "3:03:37"} +{"current_steps": 11390, "total_steps": 35625, "loss": 0.491, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8439595168860406e-05, "epoch": 1.5985964912280701, "percentage": 31.97, "elapsed_time": "1:26:20", "remaining_time": "3:03:43"} +{"current_steps": 11400, "total_steps": 35625, "loss": 0.5636, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.842098985991909e-05, "epoch": 1.6, "percentage": 32.0, "elapsed_time": "1:26:28", "remaining_time": "3:03:45"} +{"current_steps": 11410, "total_steps": 35625, "loss": 0.5482, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.840237410227717e-05, "epoch": 1.6014035087719298, "percentage": 32.03, "elapsed_time": "1:26:36", "remaining_time": "3:03:47"} +{"current_steps": 11420, "total_steps": 35625, "loss": 0.4854, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.838374791042764e-05, "epoch": 1.6028070175438596, "percentage": 32.06, "elapsed_time": "1:26:43", "remaining_time": "3:03:49"} +{"current_steps": 11430, "total_steps": 35625, "loss": 0.5535, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8365111298871645e-05, "epoch": 1.6042105263157893, "percentage": 32.08, "elapsed_time": "1:26:52", "remaining_time": "3:03:54"} +{"current_steps": 11440, "total_steps": 35625, "loss": 0.5493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.834646428211841e-05, "epoch": 1.6056140350877193, "percentage": 32.11, "elapsed_time": "1:27:01", "remaining_time": "3:03:58"} +{"current_steps": 11450, "total_steps": 35625, "loss": 0.5126, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.83278068746853e-05, "epoch": 1.6070175438596492, "percentage": 32.14, "elapsed_time": "1:27:10", "remaining_time": "3:04:03"} +{"current_steps": 11460, "total_steps": 35625, "loss": 0.5692, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.830913909109772e-05, "epoch": 1.608421052631579, "percentage": 32.17, "elapsed_time": "1:27:22", "remaining_time": "3:04:14"} +{"current_steps": 11470, "total_steps": 35625, "loss": 0.4367, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8290460945889186e-05, "epoch": 1.6098245614035087, "percentage": 32.2, "elapsed_time": "1:27:33", "remaining_time": "3:04:22"} +{"current_steps": 11480, "total_steps": 35625, "loss": 0.5275, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.827177245360129e-05, "epoch": 1.6112280701754385, "percentage": 32.22, "elapsed_time": "1:27:44", "remaining_time": "3:04:33"} +{"current_steps": 11490, "total_steps": 35625, "loss": 0.5663, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.825307362878364e-05, "epoch": 1.6126315789473684, "percentage": 32.25, "elapsed_time": "1:27:53", "remaining_time": "3:04:37"} +{"current_steps": 11500, "total_steps": 35625, "loss": 0.4986, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.823436448599393e-05, "epoch": 1.6140350877192984, "percentage": 32.28, "elapsed_time": "1:28:03", "remaining_time": "3:04:44"} +{"current_steps": 11510, "total_steps": 35625, "loss": 0.5401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8215645039797874e-05, "epoch": 1.6154385964912281, "percentage": 32.31, "elapsed_time": "1:28:10", "remaining_time": "3:04:45"} +{"current_steps": 11520, "total_steps": 35625, "loss": 0.534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8196915304769184e-05, "epoch": 1.6168421052631579, "percentage": 32.34, "elapsed_time": "1:28:22", "remaining_time": "3:04:55"} +{"current_steps": 11530, "total_steps": 35625, "loss": 0.457, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.817817529548962e-05, "epoch": 1.6182456140350876, "percentage": 32.36, "elapsed_time": "1:28:31", "remaining_time": "3:04:59"} +{"current_steps": 11540, "total_steps": 35625, "loss": 0.5023, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.815942502654889e-05, "epoch": 1.6196491228070176, "percentage": 32.39, "elapsed_time": "1:28:38", "remaining_time": "3:05:00"} +{"current_steps": 11550, "total_steps": 35625, "loss": 0.4885, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8140664512544746e-05, "epoch": 1.6210526315789475, "percentage": 32.42, "elapsed_time": "1:28:47", "remaining_time": "3:05:04"} +{"current_steps": 11560, "total_steps": 35625, "loss": 0.5204, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8121893768082896e-05, "epoch": 1.6224561403508773, "percentage": 32.45, "elapsed_time": "1:28:55", "remaining_time": "3:05:07"} +{"current_steps": 11570, "total_steps": 35625, "loss": 0.4611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8103112807776986e-05, "epoch": 1.623859649122807, "percentage": 32.48, "elapsed_time": "1:29:05", "remaining_time": "3:05:14"} +{"current_steps": 11580, "total_steps": 35625, "loss": 0.4999, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8084321646248654e-05, "epoch": 1.6252631578947367, "percentage": 32.51, "elapsed_time": "1:29:13", "remaining_time": "3:05:16"} +{"current_steps": 11590, "total_steps": 35625, "loss": 0.5241, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.806552029812747e-05, "epoch": 1.6266666666666667, "percentage": 32.53, "elapsed_time": "1:29:27", "remaining_time": "3:05:31"} +{"current_steps": 11600, "total_steps": 35625, "loss": 0.5275, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.804670877805091e-05, "epoch": 1.6280701754385964, "percentage": 32.56, "elapsed_time": "1:29:35", "remaining_time": "3:05:33"} +{"current_steps": 11610, "total_steps": 35625, "loss": 0.4517, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.802788710066439e-05, "epoch": 1.6294736842105264, "percentage": 32.59, "elapsed_time": "1:29:42", "remaining_time": "3:05:33"} +{"current_steps": 11620, "total_steps": 35625, "loss": 0.4437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.800905528062123e-05, "epoch": 1.6308771929824561, "percentage": 32.62, "elapsed_time": "1:29:52", "remaining_time": "3:05:40"} +{"current_steps": 11630, "total_steps": 35625, "loss": 0.5334, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7990213332582665e-05, "epoch": 1.6322807017543859, "percentage": 32.65, "elapsed_time": "1:29:59", "remaining_time": "3:05:39"} +{"current_steps": 11640, "total_steps": 35625, "loss": 0.5915, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7971361271217775e-05, "epoch": 1.6336842105263156, "percentage": 32.67, "elapsed_time": "1:30:08", "remaining_time": "3:05:44"} +{"current_steps": 11650, "total_steps": 35625, "loss": 0.633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7952499111203544e-05, "epoch": 1.6350877192982456, "percentage": 32.7, "elapsed_time": "1:30:18", "remaining_time": "3:05:51"} +{"current_steps": 11660, "total_steps": 35625, "loss": 0.523, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.793362686722483e-05, "epoch": 1.6364912280701756, "percentage": 32.73, "elapsed_time": "1:30:27", "remaining_time": "3:05:54"} +{"current_steps": 11670, "total_steps": 35625, "loss": 0.5025, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7914744553974284e-05, "epoch": 1.6378947368421053, "percentage": 32.76, "elapsed_time": "1:30:38", "remaining_time": "3:06:02"} +{"current_steps": 11680, "total_steps": 35625, "loss": 0.5153, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.789585218615246e-05, "epoch": 1.639298245614035, "percentage": 32.79, "elapsed_time": "1:30:47", "remaining_time": "3:06:08"} +{"current_steps": 11690, "total_steps": 35625, "loss": 0.5783, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.787694977846771e-05, "epoch": 1.6407017543859648, "percentage": 32.81, "elapsed_time": "1:31:00", "remaining_time": "3:06:19"} +{"current_steps": 11700, "total_steps": 35625, "loss": 0.5333, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.78580373456362e-05, "epoch": 1.6421052631578947, "percentage": 32.84, "elapsed_time": "1:31:08", "remaining_time": "3:06:21"} +{"current_steps": 11710, "total_steps": 35625, "loss": 0.574, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.783911490238191e-05, "epoch": 1.6435087719298247, "percentage": 32.87, "elapsed_time": "1:31:14", "remaining_time": "3:06:21"} +{"current_steps": 11720, "total_steps": 35625, "loss": 0.5028, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.782018246343661e-05, "epoch": 1.6449122807017544, "percentage": 32.9, "elapsed_time": "1:31:23", "remaining_time": "3:06:24"} +{"current_steps": 11730, "total_steps": 35625, "loss": 0.5425, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.780124004353987e-05, "epoch": 1.6463157894736842, "percentage": 32.93, "elapsed_time": "1:31:32", "remaining_time": "3:06:28"} +{"current_steps": 11740, "total_steps": 35625, "loss": 0.4961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.778228765743898e-05, "epoch": 1.647719298245614, "percentage": 32.95, "elapsed_time": "1:31:40", "remaining_time": "3:06:30"} +{"current_steps": 11750, "total_steps": 35625, "loss": 0.5135, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.776332531988903e-05, "epoch": 1.6491228070175439, "percentage": 32.98, "elapsed_time": "1:31:51", "remaining_time": "3:06:39"} +{"current_steps": 11760, "total_steps": 35625, "loss": 0.5917, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.774435304565288e-05, "epoch": 1.6505263157894738, "percentage": 33.01, "elapsed_time": "1:32:00", "remaining_time": "3:06:43"} +{"current_steps": 11770, "total_steps": 35625, "loss": 0.6529, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.772537084950106e-05, "epoch": 1.6519298245614036, "percentage": 33.04, "elapsed_time": "1:32:11", "remaining_time": "3:06:51"} +{"current_steps": 11780, "total_steps": 35625, "loss": 0.4853, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.770637874621189e-05, "epoch": 1.6533333333333333, "percentage": 33.07, "elapsed_time": "1:32:19", "remaining_time": "3:06:53"} +{"current_steps": 11790, "total_steps": 35625, "loss": 0.5509, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7687376750571347e-05, "epoch": 1.654736842105263, "percentage": 33.09, "elapsed_time": "1:32:29", "remaining_time": "3:06:58"} +{"current_steps": 11800, "total_steps": 35625, "loss": 0.5083, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7668364877373154e-05, "epoch": 1.656140350877193, "percentage": 33.12, "elapsed_time": "1:32:36", "remaining_time": "3:06:59"} +{"current_steps": 11810, "total_steps": 35625, "loss": 0.5239, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.764934314141869e-05, "epoch": 1.6575438596491228, "percentage": 33.15, "elapsed_time": "1:32:44", "remaining_time": "3:07:01"} +{"current_steps": 11820, "total_steps": 35625, "loss": 0.5295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.763031155751705e-05, "epoch": 1.6589473684210527, "percentage": 33.18, "elapsed_time": "1:32:53", "remaining_time": "3:07:05"} +{"current_steps": 11830, "total_steps": 35625, "loss": 0.3987, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7611270140484956e-05, "epoch": 1.6603508771929825, "percentage": 33.21, "elapsed_time": "1:33:04", "remaining_time": "3:07:11"} +{"current_steps": 11840, "total_steps": 35625, "loss": 0.5236, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.759221890514681e-05, "epoch": 1.6617543859649122, "percentage": 33.24, "elapsed_time": "1:33:13", "remaining_time": "3:07:17"} +{"current_steps": 11850, "total_steps": 35625, "loss": 0.4783, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.757315786633465e-05, "epoch": 1.663157894736842, "percentage": 33.26, "elapsed_time": "1:33:24", "remaining_time": "3:07:23"} +{"current_steps": 11860, "total_steps": 35625, "loss": 0.6304, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7554087038888155e-05, "epoch": 1.664561403508772, "percentage": 33.29, "elapsed_time": "1:33:31", "remaining_time": "3:07:24"} +{"current_steps": 11870, "total_steps": 35625, "loss": 0.4951, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.753500643765461e-05, "epoch": 1.6659649122807019, "percentage": 33.32, "elapsed_time": "1:33:40", "remaining_time": "3:07:27"} +{"current_steps": 11880, "total_steps": 35625, "loss": 0.5195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.751591607748891e-05, "epoch": 1.6673684210526316, "percentage": 33.35, "elapsed_time": "1:33:49", "remaining_time": "3:07:32"} +{"current_steps": 11890, "total_steps": 35625, "loss": 0.6116, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.749681597325357e-05, "epoch": 1.6687719298245614, "percentage": 33.38, "elapsed_time": "1:33:58", "remaining_time": "3:07:34"} +{"current_steps": 11900, "total_steps": 35625, "loss": 0.5038, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7477706139818683e-05, "epoch": 1.670175438596491, "percentage": 33.4, "elapsed_time": "1:34:06", "remaining_time": "3:07:37"} +{"current_steps": 11910, "total_steps": 35625, "loss": 0.5671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.745858659206188e-05, "epoch": 1.671578947368421, "percentage": 33.43, "elapsed_time": "1:34:15", "remaining_time": "3:07:41"} +{"current_steps": 11920, "total_steps": 35625, "loss": 0.5559, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.743945734486841e-05, "epoch": 1.672982456140351, "percentage": 33.46, "elapsed_time": "1:34:23", "remaining_time": "3:07:42"} +{"current_steps": 11930, "total_steps": 35625, "loss": 0.5069, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.742031841313103e-05, "epoch": 1.6743859649122808, "percentage": 33.49, "elapsed_time": "1:34:33", "remaining_time": "3:07:48"} +{"current_steps": 11940, "total_steps": 35625, "loss": 0.5431, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7401169811750066e-05, "epoch": 1.6757894736842105, "percentage": 33.52, "elapsed_time": "1:34:40", "remaining_time": "3:07:48"} +{"current_steps": 11950, "total_steps": 35625, "loss": 0.5636, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7382011555633365e-05, "epoch": 1.6771929824561402, "percentage": 33.54, "elapsed_time": "1:34:47", "remaining_time": "3:07:48"} +{"current_steps": 11960, "total_steps": 35625, "loss": 0.4871, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.736284365969627e-05, "epoch": 1.6785964912280702, "percentage": 33.57, "elapsed_time": "1:34:56", "remaining_time": "3:07:50"} +{"current_steps": 11970, "total_steps": 35625, "loss": 0.6245, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7343666138861646e-05, "epoch": 1.6800000000000002, "percentage": 33.6, "elapsed_time": "1:35:07", "remaining_time": "3:07:58"} +{"current_steps": 11980, "total_steps": 35625, "loss": 0.5126, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7324479008059865e-05, "epoch": 1.68140350877193, "percentage": 33.63, "elapsed_time": "1:35:15", "remaining_time": "3:08:00"} +{"current_steps": 11990, "total_steps": 35625, "loss": 0.5669, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7305282282228756e-05, "epoch": 1.6828070175438596, "percentage": 33.66, "elapsed_time": "1:35:25", "remaining_time": "3:08:05"} +{"current_steps": 12000, "total_steps": 35625, "loss": 0.4796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.728607597631363e-05, "epoch": 1.6842105263157894, "percentage": 33.68, "elapsed_time": "1:35:35", "remaining_time": "3:08:11"} +{"current_steps": 12000, "total_steps": 35625, "loss": null, "eval_loss": 0.6500447392463684, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.6842105263157894, "percentage": 33.68, "elapsed_time": "1:35:35", "remaining_time": "3:08:11"} +{"current_steps": 12000, "total_steps": 35625, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.6842105263157894, "percentage": 33.68, "elapsed_time": "1:35:35", "remaining_time": "3:08:11"} +{"current_steps": 375, "total_steps": 375, "loss": null, "eval_loss": 0.6415141820907593, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.6842105263157894, "percentage": 100.0, "elapsed_time": "1:39:35", "remaining_time": "0:00:00"} diff --git a/llama2_13b_peft/news_commentary_it/trainer_state.json b/llama2_13b_peft/news_commentary_it/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f695c95aa1f39734102bdf3ab2894d07b0f04078 --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/trainer_state.json @@ -0,0 +1,8478 @@ +{ + "best_metric": 0.6415141820907593, + "best_model_checkpoint": "ckpt/llama2_13b_fuze27_no_sys/news_commentary_it_no_sys/checkpoint-6000", + "epoch": 1.6842105263157894, + "eval_steps": 2000, + "global_step": 12000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0014035087719298245, + "grad_norm": 0.6813449859619141, + "learning_rate": 2.5e-05, + "loss": 1.3423, + "step": 10 + }, + { + "epoch": 0.002807017543859649, + "grad_norm": 0.9470943212509155, + "learning_rate": 5e-05, + "loss": 1.3855, + "step": 20 + }, + { + "epoch": 0.004210526315789474, + "grad_norm": 0.8929744958877563, + "learning_rate": 4.999999026832157e-05, + "loss": 0.9621, + "step": 30 + }, + { + "epoch": 0.005614035087719298, + "grad_norm": 1.383805274963379, + "learning_rate": 4.9999961073293845e-05, + "loss": 0.8217, + "step": 40 + }, + { + "epoch": 0.007017543859649123, + "grad_norm": 0.7758613228797913, + "learning_rate": 4.9999912414939555e-05, + "loss": 0.7743, + "step": 50 + }, + { + "epoch": 0.008421052631578947, + "grad_norm": 0.38530462980270386, + "learning_rate": 4.9999844293296585e-05, + "loss": 0.7671, + "step": 60 + }, + { + "epoch": 0.009824561403508772, + "grad_norm": 0.9287435412406921, + "learning_rate": 4.999975670841798e-05, + "loss": 0.7657, + "step": 70 + }, + { + "epoch": 0.011228070175438596, + "grad_norm": 0.5709918737411499, + "learning_rate": 4.9999649660371906e-05, + "loss": 0.6544, + "step": 80 + }, + { + "epoch": 0.01263157894736842, + "grad_norm": 0.6181680560112, + "learning_rate": 4.9999523149241714e-05, + "loss": 0.7627, + "step": 90 + }, + { + "epoch": 0.014035087719298246, + "grad_norm": 0.8074678182601929, + "learning_rate": 4.99993771751259e-05, + "loss": 0.7428, + "step": 100 + }, + { + "epoch": 0.015438596491228071, + "grad_norm": 0.7091221809387207, + "learning_rate": 4.999921173813812e-05, + "loss": 0.7024, + "step": 110 + }, + { + "epoch": 0.016842105263157894, + "grad_norm": 2.1647095680236816, + "learning_rate": 4.999902683840715e-05, + "loss": 0.8205, + "step": 120 + }, + { + "epoch": 0.018245614035087718, + "grad_norm": 1.178070068359375, + "learning_rate": 4.9998822476076955e-05, + "loss": 0.7359, + "step": 130 + }, + { + "epoch": 0.019649122807017545, + "grad_norm": 1.0926941633224487, + "learning_rate": 4.999859865130664e-05, + "loss": 0.6837, + "step": 140 + }, + { + "epoch": 0.021052631578947368, + "grad_norm": 1.5175189971923828, + "learning_rate": 4.9998355364270445e-05, + "loss": 0.7091, + "step": 150 + }, + { + "epoch": 0.02245614035087719, + "grad_norm": 0.9353613257408142, + "learning_rate": 4.999809261515779e-05, + "loss": 0.7608, + "step": 160 + }, + { + "epoch": 0.023859649122807018, + "grad_norm": 0.4437258839607239, + "learning_rate": 4.9997810404173234e-05, + "loss": 0.7725, + "step": 170 + }, + { + "epoch": 0.02526315789473684, + "grad_norm": 0.4320019781589508, + "learning_rate": 4.999750873153648e-05, + "loss": 0.7884, + "step": 180 + }, + { + "epoch": 0.02666666666666667, + "grad_norm": 0.8100196123123169, + "learning_rate": 4.9997187597482405e-05, + "loss": 0.7266, + "step": 190 + }, + { + "epoch": 0.028070175438596492, + "grad_norm": 1.1367573738098145, + "learning_rate": 4.9996847002261006e-05, + "loss": 0.6825, + "step": 200 + }, + { + "epoch": 0.029473684210526315, + "grad_norm": 0.9733144640922546, + "learning_rate": 4.999648694613746e-05, + "loss": 0.6162, + "step": 210 + }, + { + "epoch": 0.030877192982456142, + "grad_norm": 0.7170027494430542, + "learning_rate": 4.9996107429392083e-05, + "loss": 0.6696, + "step": 220 + }, + { + "epoch": 0.032280701754385965, + "grad_norm": 0.939182698726654, + "learning_rate": 4.9995708452320325e-05, + "loss": 0.7512, + "step": 230 + }, + { + "epoch": 0.03368421052631579, + "grad_norm": 0.7647657990455627, + "learning_rate": 4.999529001523282e-05, + "loss": 0.7137, + "step": 240 + }, + { + "epoch": 0.03508771929824561, + "grad_norm": 0.9428808093070984, + "learning_rate": 4.9994852118455335e-05, + "loss": 0.7676, + "step": 250 + }, + { + "epoch": 0.036491228070175435, + "grad_norm": 0.3808974325656891, + "learning_rate": 4.9994394762328786e-05, + "loss": 0.7208, + "step": 260 + }, + { + "epoch": 0.037894736842105266, + "grad_norm": 1.0278472900390625, + "learning_rate": 4.999391794720923e-05, + "loss": 0.7029, + "step": 270 + }, + { + "epoch": 0.03929824561403509, + "grad_norm": 0.8878808617591858, + "learning_rate": 4.9993421673467906e-05, + "loss": 0.6751, + "step": 280 + }, + { + "epoch": 0.04070175438596491, + "grad_norm": 0.5619615316390991, + "learning_rate": 4.9992905941491155e-05, + "loss": 0.7652, + "step": 290 + }, + { + "epoch": 0.042105263157894736, + "grad_norm": 1.1087744235992432, + "learning_rate": 4.9992370751680514e-05, + "loss": 0.7609, + "step": 300 + }, + { + "epoch": 0.04350877192982456, + "grad_norm": 0.7816822528839111, + "learning_rate": 4.999181610445263e-05, + "loss": 0.678, + "step": 310 + }, + { + "epoch": 0.04491228070175438, + "grad_norm": 1.0437147617340088, + "learning_rate": 4.9991242000239316e-05, + "loss": 0.7089, + "step": 320 + }, + { + "epoch": 0.04631578947368421, + "grad_norm": 0.7266655564308167, + "learning_rate": 4.9990648439487544e-05, + "loss": 0.7034, + "step": 330 + }, + { + "epoch": 0.047719298245614036, + "grad_norm": 0.8695891499519348, + "learning_rate": 4.999003542265941e-05, + "loss": 0.6789, + "step": 340 + }, + { + "epoch": 0.04912280701754386, + "grad_norm": 1.2530779838562012, + "learning_rate": 4.998940295023218e-05, + "loss": 0.6895, + "step": 350 + }, + { + "epoch": 0.05052631578947368, + "grad_norm": 0.9562914371490479, + "learning_rate": 4.9988751022698244e-05, + "loss": 0.7472, + "step": 360 + }, + { + "epoch": 0.051929824561403506, + "grad_norm": 1.5020138025283813, + "learning_rate": 4.9988079640565155e-05, + "loss": 0.7637, + "step": 370 + }, + { + "epoch": 0.05333333333333334, + "grad_norm": 1.3555861711502075, + "learning_rate": 4.998738880435561e-05, + "loss": 0.8042, + "step": 380 + }, + { + "epoch": 0.05473684210526316, + "grad_norm": 1.4689439535140991, + "learning_rate": 4.9986678514607434e-05, + "loss": 0.7878, + "step": 390 + }, + { + "epoch": 0.056140350877192984, + "grad_norm": 1.1399718523025513, + "learning_rate": 4.998594877187362e-05, + "loss": 0.6831, + "step": 400 + }, + { + "epoch": 0.05754385964912281, + "grad_norm": 0.9988260269165039, + "learning_rate": 4.998519957672232e-05, + "loss": 0.7905, + "step": 410 + }, + { + "epoch": 0.05894736842105263, + "grad_norm": 1.3424835205078125, + "learning_rate": 4.998443092973678e-05, + "loss": 0.6195, + "step": 420 + }, + { + "epoch": 0.060350877192982454, + "grad_norm": 1.3029276132583618, + "learning_rate": 4.998364283151542e-05, + "loss": 0.7603, + "step": 430 + }, + { + "epoch": 0.061754385964912284, + "grad_norm": 1.0647430419921875, + "learning_rate": 4.9982835282671816e-05, + "loss": 0.7099, + "step": 440 + }, + { + "epoch": 0.06315789473684211, + "grad_norm": 0.4545954763889313, + "learning_rate": 4.998200828383466e-05, + "loss": 0.6307, + "step": 450 + }, + { + "epoch": 0.06456140350877193, + "grad_norm": 0.9822194576263428, + "learning_rate": 4.99811618356478e-05, + "loss": 0.7084, + "step": 460 + }, + { + "epoch": 0.06596491228070175, + "grad_norm": 1.0566892623901367, + "learning_rate": 4.998029593877025e-05, + "loss": 0.6897, + "step": 470 + }, + { + "epoch": 0.06736842105263158, + "grad_norm": 0.9908930063247681, + "learning_rate": 4.9979410593876096e-05, + "loss": 0.7054, + "step": 480 + }, + { + "epoch": 0.0687719298245614, + "grad_norm": 0.5955024361610413, + "learning_rate": 4.997850580165464e-05, + "loss": 0.645, + "step": 490 + }, + { + "epoch": 0.07017543859649122, + "grad_norm": 1.570892095565796, + "learning_rate": 4.997758156281029e-05, + "loss": 0.6455, + "step": 500 + }, + { + "epoch": 0.07157894736842105, + "grad_norm": 0.9024527072906494, + "learning_rate": 4.997663787806259e-05, + "loss": 0.6797, + "step": 510 + }, + { + "epoch": 0.07298245614035087, + "grad_norm": 0.6280427575111389, + "learning_rate": 4.997567474814623e-05, + "loss": 0.7582, + "step": 520 + }, + { + "epoch": 0.07438596491228071, + "grad_norm": 1.0706899166107178, + "learning_rate": 4.997469217381105e-05, + "loss": 0.667, + "step": 530 + }, + { + "epoch": 0.07578947368421053, + "grad_norm": 0.8091099262237549, + "learning_rate": 4.997369015582201e-05, + "loss": 0.6878, + "step": 540 + }, + { + "epoch": 0.07719298245614035, + "grad_norm": 0.900131106376648, + "learning_rate": 4.9972668694959216e-05, + "loss": 0.7693, + "step": 550 + }, + { + "epoch": 0.07859649122807018, + "grad_norm": 0.791890025138855, + "learning_rate": 4.9971627792017915e-05, + "loss": 0.561, + "step": 560 + }, + { + "epoch": 0.08, + "grad_norm": 1.3132946491241455, + "learning_rate": 4.997056744780848e-05, + "loss": 0.6739, + "step": 570 + }, + { + "epoch": 0.08140350877192983, + "grad_norm": 0.8182291984558105, + "learning_rate": 4.9969487663156434e-05, + "loss": 0.6561, + "step": 580 + }, + { + "epoch": 0.08280701754385965, + "grad_norm": 1.1820317506790161, + "learning_rate": 4.9968388438902415e-05, + "loss": 0.6056, + "step": 590 + }, + { + "epoch": 0.08421052631578947, + "grad_norm": 0.6508825421333313, + "learning_rate": 4.9967269775902204e-05, + "loss": 0.6962, + "step": 600 + }, + { + "epoch": 0.0856140350877193, + "grad_norm": 0.8885963559150696, + "learning_rate": 4.996613167502674e-05, + "loss": 0.6174, + "step": 610 + }, + { + "epoch": 0.08701754385964912, + "grad_norm": 0.8769521713256836, + "learning_rate": 4.996497413716205e-05, + "loss": 0.6806, + "step": 620 + }, + { + "epoch": 0.08842105263157894, + "grad_norm": 1.1168580055236816, + "learning_rate": 4.996379716320933e-05, + "loss": 0.7618, + "step": 630 + }, + { + "epoch": 0.08982456140350877, + "grad_norm": 0.6629518270492554, + "learning_rate": 4.996260075408489e-05, + "loss": 0.6796, + "step": 640 + }, + { + "epoch": 0.0912280701754386, + "grad_norm": 0.5513269901275635, + "learning_rate": 4.996138491072018e-05, + "loss": 0.6249, + "step": 650 + }, + { + "epoch": 0.09263157894736843, + "grad_norm": 0.8878002166748047, + "learning_rate": 4.996014963406177e-05, + "loss": 0.6905, + "step": 660 + }, + { + "epoch": 0.09403508771929825, + "grad_norm": 1.407973289489746, + "learning_rate": 4.9958894925071364e-05, + "loss": 0.7082, + "step": 670 + }, + { + "epoch": 0.09543859649122807, + "grad_norm": 2.0107500553131104, + "learning_rate": 4.995762078472581e-05, + "loss": 0.6751, + "step": 680 + }, + { + "epoch": 0.0968421052631579, + "grad_norm": 0.7563285827636719, + "learning_rate": 4.995632721401705e-05, + "loss": 0.6223, + "step": 690 + }, + { + "epoch": 0.09824561403508772, + "grad_norm": 0.7729387879371643, + "learning_rate": 4.995501421395219e-05, + "loss": 0.622, + "step": 700 + }, + { + "epoch": 0.09964912280701754, + "grad_norm": 0.9992890954017639, + "learning_rate": 4.995368178555343e-05, + "loss": 0.7565, + "step": 710 + }, + { + "epoch": 0.10105263157894737, + "grad_norm": 1.0641027688980103, + "learning_rate": 4.9952329929858125e-05, + "loss": 0.7486, + "step": 720 + }, + { + "epoch": 0.10245614035087719, + "grad_norm": 0.8268628716468811, + "learning_rate": 4.995095864791873e-05, + "loss": 0.6825, + "step": 730 + }, + { + "epoch": 0.10385964912280701, + "grad_norm": 0.7123477458953857, + "learning_rate": 4.994956794080285e-05, + "loss": 0.7342, + "step": 740 + }, + { + "epoch": 0.10526315789473684, + "grad_norm": 1.0346596240997314, + "learning_rate": 4.994815780959318e-05, + "loss": 0.6289, + "step": 750 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 0.7578685283660889, + "learning_rate": 4.994672825538757e-05, + "loss": 0.5675, + "step": 760 + }, + { + "epoch": 0.1080701754385965, + "grad_norm": 1.1263622045516968, + "learning_rate": 4.994527927929897e-05, + "loss": 0.7527, + "step": 770 + }, + { + "epoch": 0.10947368421052632, + "grad_norm": 0.8590745329856873, + "learning_rate": 4.9943810882455454e-05, + "loss": 0.6421, + "step": 780 + }, + { + "epoch": 0.11087719298245614, + "grad_norm": 0.7870830297470093, + "learning_rate": 4.994232306600023e-05, + "loss": 0.7016, + "step": 790 + }, + { + "epoch": 0.11228070175438597, + "grad_norm": 0.9499567747116089, + "learning_rate": 4.99408158310916e-05, + "loss": 0.6911, + "step": 800 + }, + { + "epoch": 0.11368421052631579, + "grad_norm": 1.1604363918304443, + "learning_rate": 4.9939289178903016e-05, + "loss": 0.697, + "step": 810 + }, + { + "epoch": 0.11508771929824561, + "grad_norm": 0.7308230400085449, + "learning_rate": 4.993774311062301e-05, + "loss": 0.5691, + "step": 820 + }, + { + "epoch": 0.11649122807017544, + "grad_norm": 1.0032395124435425, + "learning_rate": 4.993617762745526e-05, + "loss": 0.7744, + "step": 830 + }, + { + "epoch": 0.11789473684210526, + "grad_norm": 1.0617241859436035, + "learning_rate": 4.993459273061855e-05, + "loss": 0.7652, + "step": 840 + }, + { + "epoch": 0.11929824561403508, + "grad_norm": 1.207223653793335, + "learning_rate": 4.993298842134677e-05, + "loss": 0.6843, + "step": 850 + }, + { + "epoch": 0.12070175438596491, + "grad_norm": 0.6737737059593201, + "learning_rate": 4.993136470088894e-05, + "loss": 0.7147, + "step": 860 + }, + { + "epoch": 0.12210526315789473, + "grad_norm": 1.3904882669448853, + "learning_rate": 4.992972157050916e-05, + "loss": 0.641, + "step": 870 + }, + { + "epoch": 0.12350877192982457, + "grad_norm": 0.8821682929992676, + "learning_rate": 4.992805903148669e-05, + "loss": 0.6212, + "step": 880 + }, + { + "epoch": 0.12491228070175439, + "grad_norm": 1.214309811592102, + "learning_rate": 4.992637708511586e-05, + "loss": 0.6817, + "step": 890 + }, + { + "epoch": 0.12631578947368421, + "grad_norm": 1.2954894304275513, + "learning_rate": 4.9924675732706123e-05, + "loss": 0.7072, + "step": 900 + }, + { + "epoch": 0.12771929824561404, + "grad_norm": 0.8437069058418274, + "learning_rate": 4.992295497558204e-05, + "loss": 0.6221, + "step": 910 + }, + { + "epoch": 0.12912280701754386, + "grad_norm": 0.6401008367538452, + "learning_rate": 4.992121481508328e-05, + "loss": 0.6162, + "step": 920 + }, + { + "epoch": 0.13052631578947368, + "grad_norm": 1.1894147396087646, + "learning_rate": 4.9919455252564624e-05, + "loss": 0.7548, + "step": 930 + }, + { + "epoch": 0.1319298245614035, + "grad_norm": 0.9592342376708984, + "learning_rate": 4.991767628939594e-05, + "loss": 0.6377, + "step": 940 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 0.6419144868850708, + "learning_rate": 4.991587792696223e-05, + "loss": 0.6971, + "step": 950 + }, + { + "epoch": 0.13473684210526315, + "grad_norm": 1.0908111333847046, + "learning_rate": 4.991406016666356e-05, + "loss": 0.7929, + "step": 960 + }, + { + "epoch": 0.13614035087719298, + "grad_norm": 1.231597661972046, + "learning_rate": 4.9912223009915126e-05, + "loss": 0.7556, + "step": 970 + }, + { + "epoch": 0.1375438596491228, + "grad_norm": 0.7628648281097412, + "learning_rate": 4.991036645814722e-05, + "loss": 0.5883, + "step": 980 + }, + { + "epoch": 0.13894736842105262, + "grad_norm": 0.766953706741333, + "learning_rate": 4.9908490512805236e-05, + "loss": 0.6362, + "step": 990 + }, + { + "epoch": 0.14035087719298245, + "grad_norm": 1.070429801940918, + "learning_rate": 4.990659517534966e-05, + "loss": 0.7057, + "step": 1000 + }, + { + "epoch": 0.14175438596491227, + "grad_norm": 0.8499042391777039, + "learning_rate": 4.990468044725606e-05, + "loss": 0.6051, + "step": 1010 + }, + { + "epoch": 0.1431578947368421, + "grad_norm": 1.1411361694335938, + "learning_rate": 4.990274633001514e-05, + "loss": 0.7434, + "step": 1020 + }, + { + "epoch": 0.14456140350877192, + "grad_norm": 1.3025455474853516, + "learning_rate": 4.990079282513266e-05, + "loss": 0.6681, + "step": 1030 + }, + { + "epoch": 0.14596491228070174, + "grad_norm": 0.9307923316955566, + "learning_rate": 4.9898819934129506e-05, + "loss": 0.6655, + "step": 1040 + }, + { + "epoch": 0.14736842105263157, + "grad_norm": 0.6463920474052429, + "learning_rate": 4.989682765854163e-05, + "loss": 0.7529, + "step": 1050 + }, + { + "epoch": 0.14877192982456142, + "grad_norm": 1.216407060623169, + "learning_rate": 4.989481599992009e-05, + "loss": 0.6249, + "step": 1060 + }, + { + "epoch": 0.15017543859649124, + "grad_norm": 0.747074544429779, + "learning_rate": 4.989278495983103e-05, + "loss": 0.6437, + "step": 1070 + }, + { + "epoch": 0.15157894736842106, + "grad_norm": 0.8777433633804321, + "learning_rate": 4.989073453985569e-05, + "loss": 0.6206, + "step": 1080 + }, + { + "epoch": 0.1529824561403509, + "grad_norm": 0.8588824272155762, + "learning_rate": 4.988866474159037e-05, + "loss": 0.6141, + "step": 1090 + }, + { + "epoch": 0.1543859649122807, + "grad_norm": 0.6369594931602478, + "learning_rate": 4.988657556664652e-05, + "loss": 0.6653, + "step": 1100 + }, + { + "epoch": 0.15578947368421053, + "grad_norm": 0.7276690006256104, + "learning_rate": 4.98844670166506e-05, + "loss": 0.6503, + "step": 1110 + }, + { + "epoch": 0.15719298245614036, + "grad_norm": 0.6937339305877686, + "learning_rate": 4.98823390932442e-05, + "loss": 0.6298, + "step": 1120 + }, + { + "epoch": 0.15859649122807018, + "grad_norm": 1.4779495000839233, + "learning_rate": 4.988019179808398e-05, + "loss": 0.6889, + "step": 1130 + }, + { + "epoch": 0.16, + "grad_norm": 1.4205069541931152, + "learning_rate": 4.987802513284169e-05, + "loss": 0.7086, + "step": 1140 + }, + { + "epoch": 0.16140350877192983, + "grad_norm": 1.5097942352294922, + "learning_rate": 4.9875839099204134e-05, + "loss": 0.6727, + "step": 1150 + }, + { + "epoch": 0.16280701754385965, + "grad_norm": 0.8267427086830139, + "learning_rate": 4.987363369887324e-05, + "loss": 0.6993, + "step": 1160 + }, + { + "epoch": 0.16421052631578947, + "grad_norm": 1.1303791999816895, + "learning_rate": 4.987140893356597e-05, + "loss": 0.5671, + "step": 1170 + }, + { + "epoch": 0.1656140350877193, + "grad_norm": 0.9507080316543579, + "learning_rate": 4.986916480501438e-05, + "loss": 0.6929, + "step": 1180 + }, + { + "epoch": 0.16701754385964912, + "grad_norm": 1.0298510789871216, + "learning_rate": 4.986690131496561e-05, + "loss": 0.5368, + "step": 1190 + }, + { + "epoch": 0.16842105263157894, + "grad_norm": 1.0742335319519043, + "learning_rate": 4.986461846518186e-05, + "loss": 0.6473, + "step": 1200 + }, + { + "epoch": 0.16982456140350877, + "grad_norm": 1.3724429607391357, + "learning_rate": 4.986231625744041e-05, + "loss": 0.6698, + "step": 1210 + }, + { + "epoch": 0.1712280701754386, + "grad_norm": 0.7210483551025391, + "learning_rate": 4.985999469353359e-05, + "loss": 0.6747, + "step": 1220 + }, + { + "epoch": 0.1726315789473684, + "grad_norm": 0.8128493428230286, + "learning_rate": 4.9857653775268853e-05, + "loss": 0.6509, + "step": 1230 + }, + { + "epoch": 0.17403508771929824, + "grad_norm": 0.9664400815963745, + "learning_rate": 4.985529350446865e-05, + "loss": 0.6895, + "step": 1240 + }, + { + "epoch": 0.17543859649122806, + "grad_norm": 1.0563639402389526, + "learning_rate": 4.985291388297055e-05, + "loss": 0.5882, + "step": 1250 + }, + { + "epoch": 0.17684210526315788, + "grad_norm": 0.7978933453559875, + "learning_rate": 4.985051491262716e-05, + "loss": 0.6688, + "step": 1260 + }, + { + "epoch": 0.1782456140350877, + "grad_norm": 1.0037199258804321, + "learning_rate": 4.984809659530617e-05, + "loss": 0.6135, + "step": 1270 + }, + { + "epoch": 0.17964912280701753, + "grad_norm": 1.0351414680480957, + "learning_rate": 4.9845658932890315e-05, + "loss": 0.6849, + "step": 1280 + }, + { + "epoch": 0.18105263157894738, + "grad_norm": 0.9015732407569885, + "learning_rate": 4.9843201927277407e-05, + "loss": 0.6036, + "step": 1290 + }, + { + "epoch": 0.1824561403508772, + "grad_norm": 1.1445683240890503, + "learning_rate": 4.984072558038031e-05, + "loss": 0.7348, + "step": 1300 + }, + { + "epoch": 0.18385964912280703, + "grad_norm": 1.2019379138946533, + "learning_rate": 4.983822989412693e-05, + "loss": 0.7679, + "step": 1310 + }, + { + "epoch": 0.18526315789473685, + "grad_norm": 0.6560442447662354, + "learning_rate": 4.983571487046026e-05, + "loss": 0.7083, + "step": 1320 + }, + { + "epoch": 0.18666666666666668, + "grad_norm": 0.8415977954864502, + "learning_rate": 4.9833180511338314e-05, + "loss": 0.6417, + "step": 1330 + }, + { + "epoch": 0.1880701754385965, + "grad_norm": 0.8725243210792542, + "learning_rate": 4.983062681873421e-05, + "loss": 0.6817, + "step": 1340 + }, + { + "epoch": 0.18947368421052632, + "grad_norm": 0.8865370154380798, + "learning_rate": 4.982805379463605e-05, + "loss": 0.6554, + "step": 1350 + }, + { + "epoch": 0.19087719298245615, + "grad_norm": 0.6979865431785583, + "learning_rate": 4.982546144104704e-05, + "loss": 0.6613, + "step": 1360 + }, + { + "epoch": 0.19228070175438597, + "grad_norm": 0.8604574203491211, + "learning_rate": 4.982284975998541e-05, + "loss": 0.6902, + "step": 1370 + }, + { + "epoch": 0.1936842105263158, + "grad_norm": 0.849172055721283, + "learning_rate": 4.982021875348445e-05, + "loss": 0.81, + "step": 1380 + }, + { + "epoch": 0.19508771929824562, + "grad_norm": 0.9217461347579956, + "learning_rate": 4.9817568423592484e-05, + "loss": 0.6796, + "step": 1390 + }, + { + "epoch": 0.19649122807017544, + "grad_norm": 1.1720378398895264, + "learning_rate": 4.981489877237288e-05, + "loss": 0.6109, + "step": 1400 + }, + { + "epoch": 0.19789473684210526, + "grad_norm": 0.8361873626708984, + "learning_rate": 4.9812209801904064e-05, + "loss": 0.7521, + "step": 1410 + }, + { + "epoch": 0.19929824561403509, + "grad_norm": 0.9124870896339417, + "learning_rate": 4.980950151427948e-05, + "loss": 0.6742, + "step": 1420 + }, + { + "epoch": 0.2007017543859649, + "grad_norm": 1.0720082521438599, + "learning_rate": 4.980677391160763e-05, + "loss": 0.659, + "step": 1430 + }, + { + "epoch": 0.20210526315789473, + "grad_norm": 0.7144408822059631, + "learning_rate": 4.980402699601205e-05, + "loss": 0.6392, + "step": 1440 + }, + { + "epoch": 0.20350877192982456, + "grad_norm": 0.8546087145805359, + "learning_rate": 4.98012607696313e-05, + "loss": 0.6674, + "step": 1450 + }, + { + "epoch": 0.20491228070175438, + "grad_norm": 0.8717739582061768, + "learning_rate": 4.979847523461898e-05, + "loss": 0.6772, + "step": 1460 + }, + { + "epoch": 0.2063157894736842, + "grad_norm": 0.9035875201225281, + "learning_rate": 4.9795670393143735e-05, + "loss": 0.6598, + "step": 1470 + }, + { + "epoch": 0.20771929824561403, + "grad_norm": 1.5168395042419434, + "learning_rate": 4.9792846247389214e-05, + "loss": 0.6784, + "step": 1480 + }, + { + "epoch": 0.20912280701754385, + "grad_norm": 1.3440768718719482, + "learning_rate": 4.979000279955413e-05, + "loss": 0.673, + "step": 1490 + }, + { + "epoch": 0.21052631578947367, + "grad_norm": 0.6575384140014648, + "learning_rate": 4.97871400518522e-05, + "loss": 0.6018, + "step": 1500 + }, + { + "epoch": 0.2119298245614035, + "grad_norm": 0.843136727809906, + "learning_rate": 4.978425800651216e-05, + "loss": 0.673, + "step": 1510 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 0.9389488101005554, + "learning_rate": 4.978135666577779e-05, + "loss": 0.6455, + "step": 1520 + }, + { + "epoch": 0.21473684210526317, + "grad_norm": 1.0860190391540527, + "learning_rate": 4.977843603190788e-05, + "loss": 0.6945, + "step": 1530 + }, + { + "epoch": 0.216140350877193, + "grad_norm": 0.923224925994873, + "learning_rate": 4.9775496107176245e-05, + "loss": 0.6441, + "step": 1540 + }, + { + "epoch": 0.21754385964912282, + "grad_norm": 0.9440721273422241, + "learning_rate": 4.977253689387172e-05, + "loss": 0.6399, + "step": 1550 + }, + { + "epoch": 0.21894736842105264, + "grad_norm": 1.225602626800537, + "learning_rate": 4.976955839429815e-05, + "loss": 0.7059, + "step": 1560 + }, + { + "epoch": 0.22035087719298246, + "grad_norm": 0.7701632380485535, + "learning_rate": 4.976656061077441e-05, + "loss": 0.7422, + "step": 1570 + }, + { + "epoch": 0.2217543859649123, + "grad_norm": 1.0036752223968506, + "learning_rate": 4.976354354563435e-05, + "loss": 0.7, + "step": 1580 + }, + { + "epoch": 0.2231578947368421, + "grad_norm": 1.2595415115356445, + "learning_rate": 4.976050720122688e-05, + "loss": 0.693, + "step": 1590 + }, + { + "epoch": 0.22456140350877193, + "grad_norm": 0.9705458283424377, + "learning_rate": 4.97574515799159e-05, + "loss": 0.6477, + "step": 1600 + }, + { + "epoch": 0.22596491228070176, + "grad_norm": 0.9339498281478882, + "learning_rate": 4.975437668408031e-05, + "loss": 0.6839, + "step": 1610 + }, + { + "epoch": 0.22736842105263158, + "grad_norm": 2.5165653228759766, + "learning_rate": 4.9751282516114024e-05, + "loss": 0.5796, + "step": 1620 + }, + { + "epoch": 0.2287719298245614, + "grad_norm": 1.2094191312789917, + "learning_rate": 4.9748169078425955e-05, + "loss": 0.6967, + "step": 1630 + }, + { + "epoch": 0.23017543859649123, + "grad_norm": 0.9400249719619751, + "learning_rate": 4.974503637344002e-05, + "loss": 0.5507, + "step": 1640 + }, + { + "epoch": 0.23157894736842105, + "grad_norm": 0.7801631093025208, + "learning_rate": 4.9741884403595135e-05, + "loss": 0.6792, + "step": 1650 + }, + { + "epoch": 0.23298245614035087, + "grad_norm": 0.8041971325874329, + "learning_rate": 4.9738713171345225e-05, + "loss": 0.616, + "step": 1660 + }, + { + "epoch": 0.2343859649122807, + "grad_norm": 0.9792094826698303, + "learning_rate": 4.9735522679159195e-05, + "loss": 0.635, + "step": 1670 + }, + { + "epoch": 0.23578947368421052, + "grad_norm": 0.8937766551971436, + "learning_rate": 4.9732312929520964e-05, + "loss": 0.6902, + "step": 1680 + }, + { + "epoch": 0.23719298245614034, + "grad_norm": 0.6050293445587158, + "learning_rate": 4.972908392492942e-05, + "loss": 0.6899, + "step": 1690 + }, + { + "epoch": 0.23859649122807017, + "grad_norm": 1.114696741104126, + "learning_rate": 4.9725835667898455e-05, + "loss": 0.7013, + "step": 1700 + }, + { + "epoch": 0.24, + "grad_norm": 0.7658337354660034, + "learning_rate": 4.972256816095695e-05, + "loss": 0.6379, + "step": 1710 + }, + { + "epoch": 0.24140350877192981, + "grad_norm": 1.0719423294067383, + "learning_rate": 4.971928140664878e-05, + "loss": 0.6819, + "step": 1720 + }, + { + "epoch": 0.24280701754385964, + "grad_norm": 0.5609824061393738, + "learning_rate": 4.971597540753279e-05, + "loss": 0.6888, + "step": 1730 + }, + { + "epoch": 0.24421052631578946, + "grad_norm": 0.8473712205886841, + "learning_rate": 4.971265016618281e-05, + "loss": 0.6761, + "step": 1740 + }, + { + "epoch": 0.24561403508771928, + "grad_norm": 1.023040533065796, + "learning_rate": 4.970930568518765e-05, + "loss": 0.5544, + "step": 1750 + }, + { + "epoch": 0.24701754385964914, + "grad_norm": 1.2763292789459229, + "learning_rate": 4.97059419671511e-05, + "loss": 0.7072, + "step": 1760 + }, + { + "epoch": 0.24842105263157896, + "grad_norm": 0.9501249194145203, + "learning_rate": 4.9702559014691965e-05, + "loss": 0.5992, + "step": 1770 + }, + { + "epoch": 0.24982456140350878, + "grad_norm": 0.714192271232605, + "learning_rate": 4.969915683044395e-05, + "loss": 0.6277, + "step": 1780 + }, + { + "epoch": 0.2512280701754386, + "grad_norm": 0.8613963723182678, + "learning_rate": 4.9695735417055776e-05, + "loss": 0.5501, + "step": 1790 + }, + { + "epoch": 0.25263157894736843, + "grad_norm": 0.7384011149406433, + "learning_rate": 4.969229477719116e-05, + "loss": 0.7619, + "step": 1800 + }, + { + "epoch": 0.2540350877192982, + "grad_norm": 0.8516148328781128, + "learning_rate": 4.9688834913528724e-05, + "loss": 0.6706, + "step": 1810 + }, + { + "epoch": 0.2554385964912281, + "grad_norm": 0.9726106524467468, + "learning_rate": 4.9685355828762115e-05, + "loss": 0.6825, + "step": 1820 + }, + { + "epoch": 0.25684210526315787, + "grad_norm": 0.9834999442100525, + "learning_rate": 4.96818575255999e-05, + "loss": 0.7195, + "step": 1830 + }, + { + "epoch": 0.2582456140350877, + "grad_norm": 0.6964922547340393, + "learning_rate": 4.967834000676564e-05, + "loss": 0.6196, + "step": 1840 + }, + { + "epoch": 0.2596491228070175, + "grad_norm": 1.0819238424301147, + "learning_rate": 4.967480327499785e-05, + "loss": 0.5768, + "step": 1850 + }, + { + "epoch": 0.26105263157894737, + "grad_norm": 0.7200153470039368, + "learning_rate": 4.9671247333049975e-05, + "loss": 0.6484, + "step": 1860 + }, + { + "epoch": 0.2624561403508772, + "grad_norm": 0.6098335385322571, + "learning_rate": 4.966767218369046e-05, + "loss": 0.6132, + "step": 1870 + }, + { + "epoch": 0.263859649122807, + "grad_norm": 1.1508702039718628, + "learning_rate": 4.966407782970267e-05, + "loss": 0.6435, + "step": 1880 + }, + { + "epoch": 0.26526315789473687, + "grad_norm": 0.9164888858795166, + "learning_rate": 4.966046427388494e-05, + "loss": 0.6581, + "step": 1890 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.7689521908760071, + "learning_rate": 4.965683151905054e-05, + "loss": 0.593, + "step": 1900 + }, + { + "epoch": 0.2680701754385965, + "grad_norm": 1.2253938913345337, + "learning_rate": 4.965317956802769e-05, + "loss": 0.7037, + "step": 1910 + }, + { + "epoch": 0.2694736842105263, + "grad_norm": 1.2759559154510498, + "learning_rate": 4.964950842365957e-05, + "loss": 0.7054, + "step": 1920 + }, + { + "epoch": 0.27087719298245616, + "grad_norm": 1.0961602926254272, + "learning_rate": 4.9645818088804284e-05, + "loss": 0.6463, + "step": 1930 + }, + { + "epoch": 0.27228070175438596, + "grad_norm": 1.0374549627304077, + "learning_rate": 4.964210856633489e-05, + "loss": 0.7222, + "step": 1940 + }, + { + "epoch": 0.2736842105263158, + "grad_norm": 1.228814721107483, + "learning_rate": 4.963837985913938e-05, + "loss": 0.7659, + "step": 1950 + }, + { + "epoch": 0.2750877192982456, + "grad_norm": 1.131882667541504, + "learning_rate": 4.963463197012067e-05, + "loss": 0.7388, + "step": 1960 + }, + { + "epoch": 0.27649122807017545, + "grad_norm": 0.6964682340621948, + "learning_rate": 4.9630864902196626e-05, + "loss": 0.6961, + "step": 1970 + }, + { + "epoch": 0.27789473684210525, + "grad_norm": 0.6383505463600159, + "learning_rate": 4.962707865830004e-05, + "loss": 0.5755, + "step": 1980 + }, + { + "epoch": 0.2792982456140351, + "grad_norm": 0.9402531981468201, + "learning_rate": 4.9623273241378636e-05, + "loss": 0.6845, + "step": 1990 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.8488597273826599, + "learning_rate": 4.9619448654395055e-05, + "loss": 0.7236, + "step": 2000 + }, + { + "epoch": 0.2807017543859649, + "eval_loss": 0.6709622740745544, + "eval_runtime": 44.146, + "eval_samples_per_second": 33.978, + "eval_steps_per_second": 8.495, + "step": 2000 + }, + { + "epoch": 0.28210526315789475, + "grad_norm": 1.1073328256607056, + "learning_rate": 4.9615604900326875e-05, + "loss": 0.5944, + "step": 2010 + }, + { + "epoch": 0.28350877192982454, + "grad_norm": 1.3910387754440308, + "learning_rate": 4.961174198216658e-05, + "loss": 0.6174, + "step": 2020 + }, + { + "epoch": 0.2849122807017544, + "grad_norm": 0.698826253414154, + "learning_rate": 4.9607859902921595e-05, + "loss": 0.6801, + "step": 2030 + }, + { + "epoch": 0.2863157894736842, + "grad_norm": 1.118665099143982, + "learning_rate": 4.960395866561425e-05, + "loss": 0.6657, + "step": 2040 + }, + { + "epoch": 0.28771929824561404, + "grad_norm": 1.1043261289596558, + "learning_rate": 4.960003827328179e-05, + "loss": 0.6536, + "step": 2050 + }, + { + "epoch": 0.28912280701754384, + "grad_norm": 0.7518707513809204, + "learning_rate": 4.959609872897637e-05, + "loss": 0.6361, + "step": 2060 + }, + { + "epoch": 0.2905263157894737, + "grad_norm": 1.0390689373016357, + "learning_rate": 4.959214003576507e-05, + "loss": 0.6369, + "step": 2070 + }, + { + "epoch": 0.2919298245614035, + "grad_norm": 1.122710108757019, + "learning_rate": 4.958816219672986e-05, + "loss": 0.7563, + "step": 2080 + }, + { + "epoch": 0.29333333333333333, + "grad_norm": 0.9062842726707458, + "learning_rate": 4.9584165214967634e-05, + "loss": 0.6575, + "step": 2090 + }, + { + "epoch": 0.29473684210526313, + "grad_norm": 1.4019687175750732, + "learning_rate": 4.9580149093590165e-05, + "loss": 0.6611, + "step": 2100 + }, + { + "epoch": 0.296140350877193, + "grad_norm": 0.9323289394378662, + "learning_rate": 4.957611383572415e-05, + "loss": 0.6456, + "step": 2110 + }, + { + "epoch": 0.29754385964912283, + "grad_norm": 1.0447218418121338, + "learning_rate": 4.9572059444511175e-05, + "loss": 0.7114, + "step": 2120 + }, + { + "epoch": 0.29894736842105263, + "grad_norm": 1.3190436363220215, + "learning_rate": 4.956798592310773e-05, + "loss": 0.6338, + "step": 2130 + }, + { + "epoch": 0.3003508771929825, + "grad_norm": 0.7944990396499634, + "learning_rate": 4.956389327468518e-05, + "loss": 0.6323, + "step": 2140 + }, + { + "epoch": 0.3017543859649123, + "grad_norm": 0.9921332001686096, + "learning_rate": 4.9559781502429784e-05, + "loss": 0.6231, + "step": 2150 + }, + { + "epoch": 0.3031578947368421, + "grad_norm": 1.0437482595443726, + "learning_rate": 4.955565060954272e-05, + "loss": 0.6515, + "step": 2160 + }, + { + "epoch": 0.3045614035087719, + "grad_norm": 0.8929722309112549, + "learning_rate": 4.9551500599240006e-05, + "loss": 0.6023, + "step": 2170 + }, + { + "epoch": 0.3059649122807018, + "grad_norm": 1.1816951036453247, + "learning_rate": 4.954733147475259e-05, + "loss": 0.678, + "step": 2180 + }, + { + "epoch": 0.30736842105263157, + "grad_norm": 1.4489054679870605, + "learning_rate": 4.954314323932627e-05, + "loss": 0.6307, + "step": 2190 + }, + { + "epoch": 0.3087719298245614, + "grad_norm": 0.6073512434959412, + "learning_rate": 4.953893589622172e-05, + "loss": 0.6543, + "step": 2200 + }, + { + "epoch": 0.3101754385964912, + "grad_norm": 0.8957934975624084, + "learning_rate": 4.9534709448714514e-05, + "loss": 0.7493, + "step": 2210 + }, + { + "epoch": 0.31157894736842107, + "grad_norm": 1.1038836240768433, + "learning_rate": 4.9530463900095084e-05, + "loss": 0.6856, + "step": 2220 + }, + { + "epoch": 0.31298245614035086, + "grad_norm": 1.2374224662780762, + "learning_rate": 4.952619925366873e-05, + "loss": 0.5721, + "step": 2230 + }, + { + "epoch": 0.3143859649122807, + "grad_norm": 0.9683862924575806, + "learning_rate": 4.9521915512755635e-05, + "loss": 0.7126, + "step": 2240 + }, + { + "epoch": 0.3157894736842105, + "grad_norm": 1.096661925315857, + "learning_rate": 4.951761268069082e-05, + "loss": 0.7522, + "step": 2250 + }, + { + "epoch": 0.31719298245614036, + "grad_norm": 0.9801945686340332, + "learning_rate": 4.95132907608242e-05, + "loss": 0.6825, + "step": 2260 + }, + { + "epoch": 0.31859649122807016, + "grad_norm": 0.8269819021224976, + "learning_rate": 4.950894975652055e-05, + "loss": 0.7592, + "step": 2270 + }, + { + "epoch": 0.32, + "grad_norm": 0.7468457818031311, + "learning_rate": 4.950458967115946e-05, + "loss": 0.5719, + "step": 2280 + }, + { + "epoch": 0.3214035087719298, + "grad_norm": 0.9176953434944153, + "learning_rate": 4.9500210508135436e-05, + "loss": 0.6288, + "step": 2290 + }, + { + "epoch": 0.32280701754385965, + "grad_norm": 0.6870772838592529, + "learning_rate": 4.9495812270857786e-05, + "loss": 0.7081, + "step": 2300 + }, + { + "epoch": 0.32421052631578945, + "grad_norm": 0.8877288103103638, + "learning_rate": 4.94913949627507e-05, + "loss": 0.6371, + "step": 2310 + }, + { + "epoch": 0.3256140350877193, + "grad_norm": 0.9289653897285461, + "learning_rate": 4.9486958587253195e-05, + "loss": 0.6712, + "step": 2320 + }, + { + "epoch": 0.3270175438596491, + "grad_norm": 0.7378761172294617, + "learning_rate": 4.9482503147819156e-05, + "loss": 0.6232, + "step": 2330 + }, + { + "epoch": 0.32842105263157895, + "grad_norm": 0.7357892394065857, + "learning_rate": 4.947802864791727e-05, + "loss": 0.6519, + "step": 2340 + }, + { + "epoch": 0.3298245614035088, + "grad_norm": 1.509859323501587, + "learning_rate": 4.947353509103112e-05, + "loss": 0.7172, + "step": 2350 + }, + { + "epoch": 0.3312280701754386, + "grad_norm": 0.9467512369155884, + "learning_rate": 4.946902248065907e-05, + "loss": 0.6784, + "step": 2360 + }, + { + "epoch": 0.33263157894736844, + "grad_norm": 1.1108275651931763, + "learning_rate": 4.946449082031435e-05, + "loss": 0.612, + "step": 2370 + }, + { + "epoch": 0.33403508771929824, + "grad_norm": 1.0811039209365845, + "learning_rate": 4.9459940113525014e-05, + "loss": 0.7573, + "step": 2380 + }, + { + "epoch": 0.3354385964912281, + "grad_norm": 0.8881508708000183, + "learning_rate": 4.945537036383394e-05, + "loss": 0.7167, + "step": 2390 + }, + { + "epoch": 0.3368421052631579, + "grad_norm": 1.144106149673462, + "learning_rate": 4.945078157479884e-05, + "loss": 0.5797, + "step": 2400 + }, + { + "epoch": 0.33824561403508774, + "grad_norm": 1.1335030794143677, + "learning_rate": 4.944617374999224e-05, + "loss": 0.603, + "step": 2410 + }, + { + "epoch": 0.33964912280701753, + "grad_norm": 0.9101009368896484, + "learning_rate": 4.944154689300148e-05, + "loss": 0.6496, + "step": 2420 + }, + { + "epoch": 0.3410526315789474, + "grad_norm": 0.9584961533546448, + "learning_rate": 4.943690100742875e-05, + "loss": 0.6945, + "step": 2430 + }, + { + "epoch": 0.3424561403508772, + "grad_norm": 0.8912618160247803, + "learning_rate": 4.943223609689101e-05, + "loss": 0.6489, + "step": 2440 + }, + { + "epoch": 0.34385964912280703, + "grad_norm": 0.7363690733909607, + "learning_rate": 4.9427552165020066e-05, + "loss": 0.6066, + "step": 2450 + }, + { + "epoch": 0.3452631578947368, + "grad_norm": 1.2380393743515015, + "learning_rate": 4.9422849215462506e-05, + "loss": 0.6208, + "step": 2460 + }, + { + "epoch": 0.3466666666666667, + "grad_norm": 1.4995614290237427, + "learning_rate": 4.9418127251879756e-05, + "loss": 0.7249, + "step": 2470 + }, + { + "epoch": 0.3480701754385965, + "grad_norm": 1.0258910655975342, + "learning_rate": 4.9413386277948006e-05, + "loss": 0.7049, + "step": 2480 + }, + { + "epoch": 0.3494736842105263, + "grad_norm": 0.9672191143035889, + "learning_rate": 4.9408626297358286e-05, + "loss": 0.7138, + "step": 2490 + }, + { + "epoch": 0.3508771929824561, + "grad_norm": 0.9736180901527405, + "learning_rate": 4.940384731381639e-05, + "loss": 0.6047, + "step": 2500 + }, + { + "epoch": 0.35228070175438597, + "grad_norm": 0.7992679476737976, + "learning_rate": 4.9399049331042925e-05, + "loss": 0.6098, + "step": 2510 + }, + { + "epoch": 0.35368421052631577, + "grad_norm": 0.6984518766403198, + "learning_rate": 4.939423235277328e-05, + "loss": 0.6862, + "step": 2520 + }, + { + "epoch": 0.3550877192982456, + "grad_norm": 0.9038867354393005, + "learning_rate": 4.938939638275765e-05, + "loss": 0.7044, + "step": 2530 + }, + { + "epoch": 0.3564912280701754, + "grad_norm": 0.9274188280105591, + "learning_rate": 4.938454142476099e-05, + "loss": 0.6377, + "step": 2540 + }, + { + "epoch": 0.35789473684210527, + "grad_norm": 1.2159563302993774, + "learning_rate": 4.9379667482563066e-05, + "loss": 0.7172, + "step": 2550 + }, + { + "epoch": 0.35929824561403506, + "grad_norm": 0.8040406703948975, + "learning_rate": 4.937477455995839e-05, + "loss": 0.5563, + "step": 2560 + }, + { + "epoch": 0.3607017543859649, + "grad_norm": 0.9026057720184326, + "learning_rate": 4.9369862660756286e-05, + "loss": 0.7217, + "step": 2570 + }, + { + "epoch": 0.36210526315789476, + "grad_norm": 0.9877568483352661, + "learning_rate": 4.9364931788780835e-05, + "loss": 0.6424, + "step": 2580 + }, + { + "epoch": 0.36350877192982456, + "grad_norm": 0.8766788244247437, + "learning_rate": 4.9359981947870874e-05, + "loss": 0.6449, + "step": 2590 + }, + { + "epoch": 0.3649122807017544, + "grad_norm": 1.3229867219924927, + "learning_rate": 4.9355013141880045e-05, + "loss": 0.7963, + "step": 2600 + }, + { + "epoch": 0.3663157894736842, + "grad_norm": 1.6900445222854614, + "learning_rate": 4.9350025374676725e-05, + "loss": 0.716, + "step": 2610 + }, + { + "epoch": 0.36771929824561406, + "grad_norm": 1.055550217628479, + "learning_rate": 4.934501865014405e-05, + "loss": 0.5228, + "step": 2620 + }, + { + "epoch": 0.36912280701754385, + "grad_norm": 0.8242397904396057, + "learning_rate": 4.933999297217994e-05, + "loss": 0.6206, + "step": 2630 + }, + { + "epoch": 0.3705263157894737, + "grad_norm": 0.9964637756347656, + "learning_rate": 4.933494834469706e-05, + "loss": 0.6324, + "step": 2640 + }, + { + "epoch": 0.3719298245614035, + "grad_norm": 1.4224967956542969, + "learning_rate": 4.9329884771622817e-05, + "loss": 0.7658, + "step": 2650 + }, + { + "epoch": 0.37333333333333335, + "grad_norm": 1.1131879091262817, + "learning_rate": 4.9324802256899385e-05, + "loss": 0.6711, + "step": 2660 + }, + { + "epoch": 0.37473684210526315, + "grad_norm": 0.7792202830314636, + "learning_rate": 4.931970080448366e-05, + "loss": 0.5751, + "step": 2670 + }, + { + "epoch": 0.376140350877193, + "grad_norm": 0.7242644429206848, + "learning_rate": 4.931458041834731e-05, + "loss": 0.6772, + "step": 2680 + }, + { + "epoch": 0.3775438596491228, + "grad_norm": 0.8322226405143738, + "learning_rate": 4.9309441102476734e-05, + "loss": 0.6141, + "step": 2690 + }, + { + "epoch": 0.37894736842105264, + "grad_norm": 1.1265790462493896, + "learning_rate": 4.930428286087306e-05, + "loss": 0.627, + "step": 2700 + }, + { + "epoch": 0.38035087719298244, + "grad_norm": 1.1249980926513672, + "learning_rate": 4.929910569755215e-05, + "loss": 0.6991, + "step": 2710 + }, + { + "epoch": 0.3817543859649123, + "grad_norm": 1.5213415622711182, + "learning_rate": 4.929390961654462e-05, + "loss": 0.6379, + "step": 2720 + }, + { + "epoch": 0.3831578947368421, + "grad_norm": 0.9948049783706665, + "learning_rate": 4.9288694621895776e-05, + "loss": 0.673, + "step": 2730 + }, + { + "epoch": 0.38456140350877194, + "grad_norm": 1.249971866607666, + "learning_rate": 4.928346071766569e-05, + "loss": 0.6562, + "step": 2740 + }, + { + "epoch": 0.38596491228070173, + "grad_norm": 1.5983259677886963, + "learning_rate": 4.927820790792912e-05, + "loss": 0.6517, + "step": 2750 + }, + { + "epoch": 0.3873684210526316, + "grad_norm": 1.1207720041275024, + "learning_rate": 4.9272936196775565e-05, + "loss": 0.6506, + "step": 2760 + }, + { + "epoch": 0.3887719298245614, + "grad_norm": 1.2459056377410889, + "learning_rate": 4.926764558830923e-05, + "loss": 0.7087, + "step": 2770 + }, + { + "epoch": 0.39017543859649123, + "grad_norm": 2.858981132507324, + "learning_rate": 4.926233608664904e-05, + "loss": 0.6409, + "step": 2780 + }, + { + "epoch": 0.391578947368421, + "grad_norm": 1.2133064270019531, + "learning_rate": 4.9257007695928624e-05, + "loss": 0.7131, + "step": 2790 + }, + { + "epoch": 0.3929824561403509, + "grad_norm": 1.264398455619812, + "learning_rate": 4.925166042029631e-05, + "loss": 0.7967, + "step": 2800 + }, + { + "epoch": 0.39438596491228073, + "grad_norm": 0.8172046542167664, + "learning_rate": 4.924629426391515e-05, + "loss": 0.6273, + "step": 2810 + }, + { + "epoch": 0.3957894736842105, + "grad_norm": 0.5741508603096008, + "learning_rate": 4.924090923096286e-05, + "loss": 0.6419, + "step": 2820 + }, + { + "epoch": 0.3971929824561404, + "grad_norm": 0.8728544116020203, + "learning_rate": 4.923550532563189e-05, + "loss": 0.6296, + "step": 2830 + }, + { + "epoch": 0.39859649122807017, + "grad_norm": 0.6913738250732422, + "learning_rate": 4.923008255212935e-05, + "loss": 0.6323, + "step": 2840 + }, + { + "epoch": 0.4, + "grad_norm": 1.0395629405975342, + "learning_rate": 4.922464091467707e-05, + "loss": 0.6613, + "step": 2850 + }, + { + "epoch": 0.4014035087719298, + "grad_norm": 0.6149466633796692, + "learning_rate": 4.921918041751155e-05, + "loss": 0.6119, + "step": 2860 + }, + { + "epoch": 0.40280701754385967, + "grad_norm": 0.8594980239868164, + "learning_rate": 4.9213701064883966e-05, + "loss": 0.6575, + "step": 2870 + }, + { + "epoch": 0.40421052631578946, + "grad_norm": 1.0025339126586914, + "learning_rate": 4.9208202861060185e-05, + "loss": 0.7369, + "step": 2880 + }, + { + "epoch": 0.4056140350877193, + "grad_norm": 1.1241748332977295, + "learning_rate": 4.920268581032074e-05, + "loss": 0.6551, + "step": 2890 + }, + { + "epoch": 0.4070175438596491, + "grad_norm": 0.7128563523292542, + "learning_rate": 4.919714991696086e-05, + "loss": 0.6584, + "step": 2900 + }, + { + "epoch": 0.40842105263157896, + "grad_norm": 0.5740714073181152, + "learning_rate": 4.9191595185290414e-05, + "loss": 0.6674, + "step": 2910 + }, + { + "epoch": 0.40982456140350876, + "grad_norm": 0.9508911371231079, + "learning_rate": 4.918602161963396e-05, + "loss": 0.7091, + "step": 2920 + }, + { + "epoch": 0.4112280701754386, + "grad_norm": 1.1646149158477783, + "learning_rate": 4.9180429224330706e-05, + "loss": 0.5862, + "step": 2930 + }, + { + "epoch": 0.4126315789473684, + "grad_norm": 1.2261298894882202, + "learning_rate": 4.917481800373451e-05, + "loss": 0.731, + "step": 2940 + }, + { + "epoch": 0.41403508771929826, + "grad_norm": 0.6014220714569092, + "learning_rate": 4.916918796221393e-05, + "loss": 0.6716, + "step": 2950 + }, + { + "epoch": 0.41543859649122805, + "grad_norm": 1.0764710903167725, + "learning_rate": 4.9163539104152124e-05, + "loss": 0.6427, + "step": 2960 + }, + { + "epoch": 0.4168421052631579, + "grad_norm": 0.7629368901252747, + "learning_rate": 4.9157871433946925e-05, + "loss": 0.6184, + "step": 2970 + }, + { + "epoch": 0.4182456140350877, + "grad_norm": 0.8151566982269287, + "learning_rate": 4.9152184956010813e-05, + "loss": 0.6208, + "step": 2980 + }, + { + "epoch": 0.41964912280701755, + "grad_norm": 1.4884957075119019, + "learning_rate": 4.91464796747709e-05, + "loss": 0.6517, + "step": 2990 + }, + { + "epoch": 0.42105263157894735, + "grad_norm": 0.6417763233184814, + "learning_rate": 4.914075559466895e-05, + "loss": 0.6656, + "step": 3000 + }, + { + "epoch": 0.4224561403508772, + "grad_norm": 0.8164128065109253, + "learning_rate": 4.913501272016135e-05, + "loss": 0.6605, + "step": 3010 + }, + { + "epoch": 0.423859649122807, + "grad_norm": 0.9845851063728333, + "learning_rate": 4.9129251055719125e-05, + "loss": 0.6348, + "step": 3020 + }, + { + "epoch": 0.42526315789473684, + "grad_norm": 0.7174735069274902, + "learning_rate": 4.912347060582793e-05, + "loss": 0.6735, + "step": 3030 + }, + { + "epoch": 0.4266666666666667, + "grad_norm": 1.0722357034683228, + "learning_rate": 4.911767137498805e-05, + "loss": 0.604, + "step": 3040 + }, + { + "epoch": 0.4280701754385965, + "grad_norm": 0.8019692897796631, + "learning_rate": 4.911185336771437e-05, + "loss": 0.659, + "step": 3050 + }, + { + "epoch": 0.42947368421052634, + "grad_norm": 0.6950979232788086, + "learning_rate": 4.910601658853642e-05, + "loss": 0.6545, + "step": 3060 + }, + { + "epoch": 0.43087719298245614, + "grad_norm": 1.0000766515731812, + "learning_rate": 4.910016104199833e-05, + "loss": 0.6057, + "step": 3070 + }, + { + "epoch": 0.432280701754386, + "grad_norm": 1.0175904035568237, + "learning_rate": 4.909428673265884e-05, + "loss": 0.5503, + "step": 3080 + }, + { + "epoch": 0.4336842105263158, + "grad_norm": 1.158728003501892, + "learning_rate": 4.90883936650913e-05, + "loss": 0.6534, + "step": 3090 + }, + { + "epoch": 0.43508771929824563, + "grad_norm": 0.9984928369522095, + "learning_rate": 4.908248184388367e-05, + "loss": 0.6696, + "step": 3100 + }, + { + "epoch": 0.43649122807017543, + "grad_norm": 0.8490105867385864, + "learning_rate": 4.90765512736385e-05, + "loss": 0.5936, + "step": 3110 + }, + { + "epoch": 0.4378947368421053, + "grad_norm": 1.14065420627594, + "learning_rate": 4.907060195897296e-05, + "loss": 0.6154, + "step": 3120 + }, + { + "epoch": 0.4392982456140351, + "grad_norm": 1.0342949628829956, + "learning_rate": 4.906463390451878e-05, + "loss": 0.7975, + "step": 3130 + }, + { + "epoch": 0.44070175438596493, + "grad_norm": 1.2673470973968506, + "learning_rate": 4.9058647114922286e-05, + "loss": 0.6742, + "step": 3140 + }, + { + "epoch": 0.4421052631578947, + "grad_norm": 0.7902513146400452, + "learning_rate": 4.9052641594844416e-05, + "loss": 0.6221, + "step": 3150 + }, + { + "epoch": 0.4435087719298246, + "grad_norm": 0.813940167427063, + "learning_rate": 4.9046617348960666e-05, + "loss": 0.7789, + "step": 3160 + }, + { + "epoch": 0.44491228070175437, + "grad_norm": 0.9385407567024231, + "learning_rate": 4.904057438196111e-05, + "loss": 0.6668, + "step": 3170 + }, + { + "epoch": 0.4463157894736842, + "grad_norm": 1.005690574645996, + "learning_rate": 4.903451269855043e-05, + "loss": 0.6732, + "step": 3180 + }, + { + "epoch": 0.447719298245614, + "grad_norm": 1.1185845136642456, + "learning_rate": 4.9028432303447826e-05, + "loss": 0.652, + "step": 3190 + }, + { + "epoch": 0.44912280701754387, + "grad_norm": 1.234397292137146, + "learning_rate": 4.902233320138711e-05, + "loss": 0.7734, + "step": 3200 + }, + { + "epoch": 0.45052631578947366, + "grad_norm": 0.75343918800354, + "learning_rate": 4.901621539711664e-05, + "loss": 0.6524, + "step": 3210 + }, + { + "epoch": 0.4519298245614035, + "grad_norm": 0.7265051603317261, + "learning_rate": 4.901007889539933e-05, + "loss": 0.5631, + "step": 3220 + }, + { + "epoch": 0.4533333333333333, + "grad_norm": 0.8995214700698853, + "learning_rate": 4.900392370101266e-05, + "loss": 0.6949, + "step": 3230 + }, + { + "epoch": 0.45473684210526316, + "grad_norm": 1.1753424406051636, + "learning_rate": 4.899774981874867e-05, + "loss": 0.7445, + "step": 3240 + }, + { + "epoch": 0.45614035087719296, + "grad_norm": 0.9139629602432251, + "learning_rate": 4.8991557253413924e-05, + "loss": 0.6329, + "step": 3250 + }, + { + "epoch": 0.4575438596491228, + "grad_norm": 1.145979881286621, + "learning_rate": 4.8985346009829546e-05, + "loss": 0.6808, + "step": 3260 + }, + { + "epoch": 0.4589473684210526, + "grad_norm": 0.5931209921836853, + "learning_rate": 4.8979116092831223e-05, + "loss": 0.6464, + "step": 3270 + }, + { + "epoch": 0.46035087719298246, + "grad_norm": 0.9794625639915466, + "learning_rate": 4.897286750726913e-05, + "loss": 0.6997, + "step": 3280 + }, + { + "epoch": 0.4617543859649123, + "grad_norm": 1.121286153793335, + "learning_rate": 4.8966600258008024e-05, + "loss": 0.642, + "step": 3290 + }, + { + "epoch": 0.4631578947368421, + "grad_norm": 0.8356245160102844, + "learning_rate": 4.896031434992717e-05, + "loss": 0.651, + "step": 3300 + }, + { + "epoch": 0.46456140350877195, + "grad_norm": 0.8175771832466125, + "learning_rate": 4.8954009787920365e-05, + "loss": 0.6899, + "step": 3310 + }, + { + "epoch": 0.46596491228070175, + "grad_norm": 1.083617925643921, + "learning_rate": 4.894768657689592e-05, + "loss": 0.7559, + "step": 3320 + }, + { + "epoch": 0.4673684210526316, + "grad_norm": 1.0625582933425903, + "learning_rate": 4.8941344721776675e-05, + "loss": 0.6473, + "step": 3330 + }, + { + "epoch": 0.4687719298245614, + "grad_norm": 0.9211772680282593, + "learning_rate": 4.893498422749997e-05, + "loss": 0.726, + "step": 3340 + }, + { + "epoch": 0.47017543859649125, + "grad_norm": 0.8347317576408386, + "learning_rate": 4.8928605099017696e-05, + "loss": 0.6, + "step": 3350 + }, + { + "epoch": 0.47157894736842104, + "grad_norm": 1.115190029144287, + "learning_rate": 4.89222073412962e-05, + "loss": 0.6202, + "step": 3360 + }, + { + "epoch": 0.4729824561403509, + "grad_norm": 1.101366400718689, + "learning_rate": 4.8915790959316356e-05, + "loss": 0.6717, + "step": 3370 + }, + { + "epoch": 0.4743859649122807, + "grad_norm": 0.7661691308021545, + "learning_rate": 4.890935595807355e-05, + "loss": 0.6328, + "step": 3380 + }, + { + "epoch": 0.47578947368421054, + "grad_norm": 0.8245850205421448, + "learning_rate": 4.890290234257764e-05, + "loss": 0.7271, + "step": 3390 + }, + { + "epoch": 0.47719298245614034, + "grad_norm": 1.0110929012298584, + "learning_rate": 4.889643011785299e-05, + "loss": 0.582, + "step": 3400 + }, + { + "epoch": 0.4785964912280702, + "grad_norm": 0.7848758697509766, + "learning_rate": 4.888993928893846e-05, + "loss": 0.6851, + "step": 3410 + }, + { + "epoch": 0.48, + "grad_norm": 0.7310847640037537, + "learning_rate": 4.888342986088736e-05, + "loss": 0.583, + "step": 3420 + }, + { + "epoch": 0.48140350877192983, + "grad_norm": 1.3532679080963135, + "learning_rate": 4.887690183876752e-05, + "loss": 0.6261, + "step": 3430 + }, + { + "epoch": 0.48280701754385963, + "grad_norm": 1.0199493169784546, + "learning_rate": 4.887035522766122e-05, + "loss": 0.6563, + "step": 3440 + }, + { + "epoch": 0.4842105263157895, + "grad_norm": 0.7497562766075134, + "learning_rate": 4.886379003266523e-05, + "loss": 0.5178, + "step": 3450 + }, + { + "epoch": 0.4856140350877193, + "grad_norm": 0.8139173984527588, + "learning_rate": 4.885720625889078e-05, + "loss": 0.824, + "step": 3460 + }, + { + "epoch": 0.4870175438596491, + "grad_norm": 0.6662510633468628, + "learning_rate": 4.8850603911463556e-05, + "loss": 0.6821, + "step": 3470 + }, + { + "epoch": 0.4884210526315789, + "grad_norm": 0.9491138458251953, + "learning_rate": 4.8843982995523704e-05, + "loss": 0.6955, + "step": 3480 + }, + { + "epoch": 0.4898245614035088, + "grad_norm": 0.7988129258155823, + "learning_rate": 4.883734351622586e-05, + "loss": 0.6447, + "step": 3490 + }, + { + "epoch": 0.49122807017543857, + "grad_norm": 1.0620664358139038, + "learning_rate": 4.8830685478739057e-05, + "loss": 0.6454, + "step": 3500 + }, + { + "epoch": 0.4926315789473684, + "grad_norm": 0.7549204230308533, + "learning_rate": 4.8824008888246834e-05, + "loss": 0.6676, + "step": 3510 + }, + { + "epoch": 0.49403508771929827, + "grad_norm": 0.9870264530181885, + "learning_rate": 4.8817313749947115e-05, + "loss": 0.5715, + "step": 3520 + }, + { + "epoch": 0.49543859649122807, + "grad_norm": 0.7582098245620728, + "learning_rate": 4.881060006905232e-05, + "loss": 0.6479, + "step": 3530 + }, + { + "epoch": 0.4968421052631579, + "grad_norm": 0.6988912224769592, + "learning_rate": 4.880386785078925e-05, + "loss": 0.6208, + "step": 3540 + }, + { + "epoch": 0.4982456140350877, + "grad_norm": 0.7568824291229248, + "learning_rate": 4.87971171003992e-05, + "loss": 0.6503, + "step": 3550 + }, + { + "epoch": 0.49964912280701756, + "grad_norm": 1.2903584241867065, + "learning_rate": 4.879034782313786e-05, + "loss": 0.6525, + "step": 3560 + }, + { + "epoch": 0.5010526315789474, + "grad_norm": 0.7582905888557434, + "learning_rate": 4.878356002427532e-05, + "loss": 0.633, + "step": 3570 + }, + { + "epoch": 0.5024561403508772, + "grad_norm": 0.9976963400840759, + "learning_rate": 4.877675370909612e-05, + "loss": 0.6184, + "step": 3580 + }, + { + "epoch": 0.503859649122807, + "grad_norm": 0.8688436150550842, + "learning_rate": 4.876992888289923e-05, + "loss": 0.64, + "step": 3590 + }, + { + "epoch": 0.5052631578947369, + "grad_norm": 1.4120594263076782, + "learning_rate": 4.876308555099799e-05, + "loss": 0.6238, + "step": 3600 + }, + { + "epoch": 0.5066666666666667, + "grad_norm": 1.190382719039917, + "learning_rate": 4.875622371872017e-05, + "loss": 0.7433, + "step": 3610 + }, + { + "epoch": 0.5080701754385964, + "grad_norm": 0.8115689754486084, + "learning_rate": 4.874934339140795e-05, + "loss": 0.7031, + "step": 3620 + }, + { + "epoch": 0.5094736842105263, + "grad_norm": 0.8457335233688354, + "learning_rate": 4.8742444574417904e-05, + "loss": 0.5443, + "step": 3630 + }, + { + "epoch": 0.5108771929824562, + "grad_norm": 0.8754384517669678, + "learning_rate": 4.873552727312099e-05, + "loss": 0.6728, + "step": 3640 + }, + { + "epoch": 0.512280701754386, + "grad_norm": 1.2087777853012085, + "learning_rate": 4.872859149290256e-05, + "loss": 0.6321, + "step": 3650 + }, + { + "epoch": 0.5136842105263157, + "grad_norm": 1.0635002851486206, + "learning_rate": 4.872163723916237e-05, + "loss": 0.6301, + "step": 3660 + }, + { + "epoch": 0.5150877192982456, + "grad_norm": 1.1686186790466309, + "learning_rate": 4.871466451731453e-05, + "loss": 0.6991, + "step": 3670 + }, + { + "epoch": 0.5164912280701754, + "grad_norm": 1.1546950340270996, + "learning_rate": 4.870767333278755e-05, + "loss": 0.5503, + "step": 3680 + }, + { + "epoch": 0.5178947368421053, + "grad_norm": 0.8768120408058167, + "learning_rate": 4.87006636910243e-05, + "loss": 0.6342, + "step": 3690 + }, + { + "epoch": 0.519298245614035, + "grad_norm": 0.8353332281112671, + "learning_rate": 4.8693635597482045e-05, + "loss": 0.5933, + "step": 3700 + }, + { + "epoch": 0.5207017543859649, + "grad_norm": 0.8518616557121277, + "learning_rate": 4.868658905763238e-05, + "loss": 0.5878, + "step": 3710 + }, + { + "epoch": 0.5221052631578947, + "grad_norm": 0.8607089519500732, + "learning_rate": 4.8679524076961284e-05, + "loss": 0.5478, + "step": 3720 + }, + { + "epoch": 0.5235087719298246, + "grad_norm": 1.3177140951156616, + "learning_rate": 4.867244066096909e-05, + "loss": 0.6024, + "step": 3730 + }, + { + "epoch": 0.5249122807017544, + "grad_norm": 1.1247279644012451, + "learning_rate": 4.866533881517046e-05, + "loss": 0.6106, + "step": 3740 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.0166698694229126, + "learning_rate": 4.865821854509445e-05, + "loss": 0.602, + "step": 3750 + }, + { + "epoch": 0.527719298245614, + "grad_norm": 0.7537686824798584, + "learning_rate": 4.865107985628442e-05, + "loss": 0.7147, + "step": 3760 + }, + { + "epoch": 0.5291228070175439, + "grad_norm": 1.1428786516189575, + "learning_rate": 4.86439227542981e-05, + "loss": 0.6561, + "step": 3770 + }, + { + "epoch": 0.5305263157894737, + "grad_norm": 1.1645269393920898, + "learning_rate": 4.863674724470751e-05, + "loss": 0.7062, + "step": 3780 + }, + { + "epoch": 0.5319298245614035, + "grad_norm": 1.128609299659729, + "learning_rate": 4.862955333309905e-05, + "loss": 0.7019, + "step": 3790 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 1.0182465314865112, + "learning_rate": 4.8622341025073425e-05, + "loss": 0.5702, + "step": 3800 + }, + { + "epoch": 0.5347368421052632, + "grad_norm": 0.9816009998321533, + "learning_rate": 4.861511032624567e-05, + "loss": 0.6956, + "step": 3810 + }, + { + "epoch": 0.536140350877193, + "grad_norm": 0.7931702733039856, + "learning_rate": 4.860786124224512e-05, + "loss": 0.6266, + "step": 3820 + }, + { + "epoch": 0.5375438596491228, + "grad_norm": 1.3353627920150757, + "learning_rate": 4.860059377871544e-05, + "loss": 0.6758, + "step": 3830 + }, + { + "epoch": 0.5389473684210526, + "grad_norm": 1.1476149559020996, + "learning_rate": 4.85933079413146e-05, + "loss": 0.6559, + "step": 3840 + }, + { + "epoch": 0.5403508771929825, + "grad_norm": 0.9160752892494202, + "learning_rate": 4.858600373571487e-05, + "loss": 0.6052, + "step": 3850 + }, + { + "epoch": 0.5417543859649123, + "grad_norm": 1.0451756715774536, + "learning_rate": 4.8578681167602834e-05, + "loss": 0.6119, + "step": 3860 + }, + { + "epoch": 0.5431578947368421, + "grad_norm": 0.9673342108726501, + "learning_rate": 4.8571340242679354e-05, + "loss": 0.5872, + "step": 3870 + }, + { + "epoch": 0.5445614035087719, + "grad_norm": 1.24473237991333, + "learning_rate": 4.856398096665959e-05, + "loss": 0.7302, + "step": 3880 + }, + { + "epoch": 0.5459649122807018, + "grad_norm": 0.967494547367096, + "learning_rate": 4.8556603345273e-05, + "loss": 0.6889, + "step": 3890 + }, + { + "epoch": 0.5473684210526316, + "grad_norm": 0.9426731467247009, + "learning_rate": 4.8549207384263305e-05, + "loss": 0.6045, + "step": 3900 + }, + { + "epoch": 0.5487719298245614, + "grad_norm": 1.033600091934204, + "learning_rate": 4.854179308938852e-05, + "loss": 0.743, + "step": 3910 + }, + { + "epoch": 0.5501754385964912, + "grad_norm": 0.9784322381019592, + "learning_rate": 4.8534360466420926e-05, + "loss": 0.6416, + "step": 3920 + }, + { + "epoch": 0.5515789473684211, + "grad_norm": 1.0500706434249878, + "learning_rate": 4.852690952114708e-05, + "loss": 0.5975, + "step": 3930 + }, + { + "epoch": 0.5529824561403509, + "grad_norm": 1.6134823560714722, + "learning_rate": 4.851944025936779e-05, + "loss": 0.7975, + "step": 3940 + }, + { + "epoch": 0.5543859649122806, + "grad_norm": 0.785410463809967, + "learning_rate": 4.851195268689813e-05, + "loss": 0.6836, + "step": 3950 + }, + { + "epoch": 0.5557894736842105, + "grad_norm": 1.15956449508667, + "learning_rate": 4.850444680956745e-05, + "loss": 0.5265, + "step": 3960 + }, + { + "epoch": 0.5571929824561404, + "grad_norm": 1.0284963846206665, + "learning_rate": 4.8496922633219314e-05, + "loss": 0.687, + "step": 3970 + }, + { + "epoch": 0.5585964912280702, + "grad_norm": 0.5753929615020752, + "learning_rate": 4.8489380163711556e-05, + "loss": 0.5644, + "step": 3980 + }, + { + "epoch": 0.56, + "grad_norm": 1.0494047403335571, + "learning_rate": 4.848181940691625e-05, + "loss": 0.6013, + "step": 3990 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 1.090614914894104, + "learning_rate": 4.8474240368719703e-05, + "loss": 0.724, + "step": 4000 + }, + { + "epoch": 0.5614035087719298, + "eval_loss": 0.6521075963973999, + "eval_runtime": 44.1632, + "eval_samples_per_second": 33.965, + "eval_steps_per_second": 8.491, + "step": 4000 + }, + { + "epoch": 0.5628070175438596, + "grad_norm": 1.593772530555725, + "learning_rate": 4.846664305502245e-05, + "loss": 0.6668, + "step": 4010 + }, + { + "epoch": 0.5642105263157895, + "grad_norm": 1.0096566677093506, + "learning_rate": 4.8459027471739284e-05, + "loss": 0.6898, + "step": 4020 + }, + { + "epoch": 0.5656140350877193, + "grad_norm": 1.126257061958313, + "learning_rate": 4.8451393624799165e-05, + "loss": 0.6639, + "step": 4030 + }, + { + "epoch": 0.5670175438596491, + "grad_norm": 1.0839751958847046, + "learning_rate": 4.844374152014532e-05, + "loss": 0.7336, + "step": 4040 + }, + { + "epoch": 0.5684210526315789, + "grad_norm": 0.4993619918823242, + "learning_rate": 4.843607116373518e-05, + "loss": 0.6233, + "step": 4050 + }, + { + "epoch": 0.5698245614035088, + "grad_norm": 1.6385512351989746, + "learning_rate": 4.8428382561540366e-05, + "loss": 0.6178, + "step": 4060 + }, + { + "epoch": 0.5712280701754386, + "grad_norm": 0.9295198321342468, + "learning_rate": 4.8420675719546723e-05, + "loss": 0.6121, + "step": 4070 + }, + { + "epoch": 0.5726315789473684, + "grad_norm": 1.2179811000823975, + "learning_rate": 4.8412950643754305e-05, + "loss": 0.6225, + "step": 4080 + }, + { + "epoch": 0.5740350877192982, + "grad_norm": 1.1477456092834473, + "learning_rate": 4.840520734017734e-05, + "loss": 0.6502, + "step": 4090 + }, + { + "epoch": 0.5754385964912281, + "grad_norm": 0.8792319297790527, + "learning_rate": 4.839744581484425e-05, + "loss": 0.6799, + "step": 4100 + }, + { + "epoch": 0.5768421052631579, + "grad_norm": 1.995977759361267, + "learning_rate": 4.8389666073797646e-05, + "loss": 0.7671, + "step": 4110 + }, + { + "epoch": 0.5782456140350877, + "grad_norm": 0.680174708366394, + "learning_rate": 4.8381868123094335e-05, + "loss": 0.6289, + "step": 4120 + }, + { + "epoch": 0.5796491228070175, + "grad_norm": 0.8312070369720459, + "learning_rate": 4.837405196880529e-05, + "loss": 0.6621, + "step": 4130 + }, + { + "epoch": 0.5810526315789474, + "grad_norm": 0.8448961973190308, + "learning_rate": 4.836621761701564e-05, + "loss": 0.601, + "step": 4140 + }, + { + "epoch": 0.5824561403508772, + "grad_norm": 1.1311395168304443, + "learning_rate": 4.835836507382471e-05, + "loss": 0.6818, + "step": 4150 + }, + { + "epoch": 0.583859649122807, + "grad_norm": 0.8135958313941956, + "learning_rate": 4.835049434534596e-05, + "loss": 0.6688, + "step": 4160 + }, + { + "epoch": 0.5852631578947368, + "grad_norm": 0.9292672276496887, + "learning_rate": 4.8342605437707034e-05, + "loss": 0.7652, + "step": 4170 + }, + { + "epoch": 0.5866666666666667, + "grad_norm": 1.1490682363510132, + "learning_rate": 4.8334698357049715e-05, + "loss": 0.5381, + "step": 4180 + }, + { + "epoch": 0.5880701754385965, + "grad_norm": 1.1863840818405151, + "learning_rate": 4.832677310952993e-05, + "loss": 0.6786, + "step": 4190 + }, + { + "epoch": 0.5894736842105263, + "grad_norm": 0.7175789475440979, + "learning_rate": 4.831882970131777e-05, + "loss": 0.629, + "step": 4200 + }, + { + "epoch": 0.5908771929824561, + "grad_norm": 0.898485541343689, + "learning_rate": 4.831086813859743e-05, + "loss": 0.6021, + "step": 4210 + }, + { + "epoch": 0.592280701754386, + "grad_norm": 1.0772299766540527, + "learning_rate": 4.830288842756728e-05, + "loss": 0.5706, + "step": 4220 + }, + { + "epoch": 0.5936842105263158, + "grad_norm": 0.8830444812774658, + "learning_rate": 4.8294890574439784e-05, + "loss": 0.6716, + "step": 4230 + }, + { + "epoch": 0.5950877192982457, + "grad_norm": 1.12392258644104, + "learning_rate": 4.828687458544155e-05, + "loss": 0.6315, + "step": 4240 + }, + { + "epoch": 0.5964912280701754, + "grad_norm": 1.751460075378418, + "learning_rate": 4.82788404668133e-05, + "loss": 0.5633, + "step": 4250 + }, + { + "epoch": 0.5978947368421053, + "grad_norm": 1.024601697921753, + "learning_rate": 4.827078822480987e-05, + "loss": 0.6747, + "step": 4260 + }, + { + "epoch": 0.5992982456140351, + "grad_norm": 0.8278754949569702, + "learning_rate": 4.826271786570021e-05, + "loss": 0.6555, + "step": 4270 + }, + { + "epoch": 0.600701754385965, + "grad_norm": 0.9836990237236023, + "learning_rate": 4.825462939576737e-05, + "loss": 0.5987, + "step": 4280 + }, + { + "epoch": 0.6021052631578947, + "grad_norm": 0.5657834410667419, + "learning_rate": 4.8246522821308495e-05, + "loss": 0.6753, + "step": 4290 + }, + { + "epoch": 0.6035087719298246, + "grad_norm": 1.1341723203659058, + "learning_rate": 4.823839814863484e-05, + "loss": 0.5856, + "step": 4300 + }, + { + "epoch": 0.6049122807017544, + "grad_norm": 1.311997652053833, + "learning_rate": 4.823025538407173e-05, + "loss": 0.6204, + "step": 4310 + }, + { + "epoch": 0.6063157894736843, + "grad_norm": 0.8703358173370361, + "learning_rate": 4.82220945339586e-05, + "loss": 0.5866, + "step": 4320 + }, + { + "epoch": 0.607719298245614, + "grad_norm": 0.8117982149124146, + "learning_rate": 4.8213915604648944e-05, + "loss": 0.7384, + "step": 4330 + }, + { + "epoch": 0.6091228070175438, + "grad_norm": 1.2093411684036255, + "learning_rate": 4.820571860251034e-05, + "loss": 0.7113, + "step": 4340 + }, + { + "epoch": 0.6105263157894737, + "grad_norm": 0.895978569984436, + "learning_rate": 4.819750353392443e-05, + "loss": 0.6544, + "step": 4350 + }, + { + "epoch": 0.6119298245614035, + "grad_norm": 0.8177430629730225, + "learning_rate": 4.818927040528693e-05, + "loss": 0.6317, + "step": 4360 + }, + { + "epoch": 0.6133333333333333, + "grad_norm": 0.8065016865730286, + "learning_rate": 4.818101922300762e-05, + "loss": 0.5756, + "step": 4370 + }, + { + "epoch": 0.6147368421052631, + "grad_norm": 0.9234448075294495, + "learning_rate": 4.8172749993510315e-05, + "loss": 0.687, + "step": 4380 + }, + { + "epoch": 0.616140350877193, + "grad_norm": 1.0152438879013062, + "learning_rate": 4.81644627232329e-05, + "loss": 0.6573, + "step": 4390 + }, + { + "epoch": 0.6175438596491228, + "grad_norm": 0.8767795562744141, + "learning_rate": 4.81561574186273e-05, + "loss": 0.6787, + "step": 4400 + }, + { + "epoch": 0.6189473684210526, + "grad_norm": 0.8680139183998108, + "learning_rate": 4.814783408615948e-05, + "loss": 0.5503, + "step": 4410 + }, + { + "epoch": 0.6203508771929824, + "grad_norm": 0.9502211213111877, + "learning_rate": 4.813949273230944e-05, + "loss": 0.6495, + "step": 4420 + }, + { + "epoch": 0.6217543859649123, + "grad_norm": 0.8180057406425476, + "learning_rate": 4.8131133363571214e-05, + "loss": 0.5845, + "step": 4430 + }, + { + "epoch": 0.6231578947368421, + "grad_norm": 1.3863866329193115, + "learning_rate": 4.8122755986452845e-05, + "loss": 0.6093, + "step": 4440 + }, + { + "epoch": 0.624561403508772, + "grad_norm": 0.7499920129776001, + "learning_rate": 4.8114360607476416e-05, + "loss": 0.6465, + "step": 4450 + }, + { + "epoch": 0.6259649122807017, + "grad_norm": 0.7183496952056885, + "learning_rate": 4.810594723317801e-05, + "loss": 0.6228, + "step": 4460 + }, + { + "epoch": 0.6273684210526316, + "grad_norm": 1.3374441862106323, + "learning_rate": 4.809751587010774e-05, + "loss": 0.657, + "step": 4470 + }, + { + "epoch": 0.6287719298245614, + "grad_norm": 0.8970227837562561, + "learning_rate": 4.80890665248297e-05, + "loss": 0.6068, + "step": 4480 + }, + { + "epoch": 0.6301754385964913, + "grad_norm": 1.075203537940979, + "learning_rate": 4.808059920392201e-05, + "loss": 0.7177, + "step": 4490 + }, + { + "epoch": 0.631578947368421, + "grad_norm": 1.3227583169937134, + "learning_rate": 4.807211391397674e-05, + "loss": 0.6333, + "step": 4500 + }, + { + "epoch": 0.6329824561403509, + "grad_norm": 0.8684366941452026, + "learning_rate": 4.806361066160001e-05, + "loss": 0.6396, + "step": 4510 + }, + { + "epoch": 0.6343859649122807, + "grad_norm": 0.9884424209594727, + "learning_rate": 4.8055089453411875e-05, + "loss": 0.622, + "step": 4520 + }, + { + "epoch": 0.6357894736842106, + "grad_norm": 1.2879207134246826, + "learning_rate": 4.80465502960464e-05, + "loss": 0.6551, + "step": 4530 + }, + { + "epoch": 0.6371929824561403, + "grad_norm": 0.8450446724891663, + "learning_rate": 4.80379931961516e-05, + "loss": 0.6874, + "step": 4540 + }, + { + "epoch": 0.6385964912280702, + "grad_norm": 1.0679776668548584, + "learning_rate": 4.8029418160389484e-05, + "loss": 0.5982, + "step": 4550 + }, + { + "epoch": 0.64, + "grad_norm": 0.7384183406829834, + "learning_rate": 4.8020825195435994e-05, + "loss": 0.5541, + "step": 4560 + }, + { + "epoch": 0.6414035087719299, + "grad_norm": 0.8015978336334229, + "learning_rate": 4.8012214307981064e-05, + "loss": 0.7297, + "step": 4570 + }, + { + "epoch": 0.6428070175438596, + "grad_norm": 0.7276405692100525, + "learning_rate": 4.800358550472855e-05, + "loss": 0.7694, + "step": 4580 + }, + { + "epoch": 0.6442105263157895, + "grad_norm": 0.7692060470581055, + "learning_rate": 4.799493879239628e-05, + "loss": 0.6194, + "step": 4590 + }, + { + "epoch": 0.6456140350877193, + "grad_norm": 1.2254407405853271, + "learning_rate": 4.7986274177716024e-05, + "loss": 0.6358, + "step": 4600 + }, + { + "epoch": 0.6470175438596492, + "grad_norm": 1.0495854616165161, + "learning_rate": 4.797759166743346e-05, + "loss": 0.6828, + "step": 4610 + }, + { + "epoch": 0.6484210526315789, + "grad_norm": 0.9298211932182312, + "learning_rate": 4.7968891268308246e-05, + "loss": 0.7163, + "step": 4620 + }, + { + "epoch": 0.6498245614035087, + "grad_norm": 0.9762528538703918, + "learning_rate": 4.796017298711391e-05, + "loss": 0.5935, + "step": 4630 + }, + { + "epoch": 0.6512280701754386, + "grad_norm": 1.0231860876083374, + "learning_rate": 4.795143683063797e-05, + "loss": 0.5696, + "step": 4640 + }, + { + "epoch": 0.6526315789473685, + "grad_norm": 1.1608182191848755, + "learning_rate": 4.7942682805681797e-05, + "loss": 0.5665, + "step": 4650 + }, + { + "epoch": 0.6540350877192982, + "grad_norm": 0.6527351140975952, + "learning_rate": 4.79339109190607e-05, + "loss": 0.6242, + "step": 4660 + }, + { + "epoch": 0.655438596491228, + "grad_norm": 0.8694155812263489, + "learning_rate": 4.792512117760391e-05, + "loss": 0.6259, + "step": 4670 + }, + { + "epoch": 0.6568421052631579, + "grad_norm": 0.9847631454467773, + "learning_rate": 4.7916313588154514e-05, + "loss": 0.6757, + "step": 4680 + }, + { + "epoch": 0.6582456140350877, + "grad_norm": 0.5999444127082825, + "learning_rate": 4.790748815756954e-05, + "loss": 0.6324, + "step": 4690 + }, + { + "epoch": 0.6596491228070176, + "grad_norm": 1.4817160367965698, + "learning_rate": 4.78986448927199e-05, + "loss": 0.5834, + "step": 4700 + }, + { + "epoch": 0.6610526315789473, + "grad_norm": 1.3592370748519897, + "learning_rate": 4.788978380049036e-05, + "loss": 0.6985, + "step": 4710 + }, + { + "epoch": 0.6624561403508772, + "grad_norm": 0.9479141235351562, + "learning_rate": 4.78809048877796e-05, + "loss": 0.6595, + "step": 4720 + }, + { + "epoch": 0.663859649122807, + "grad_norm": 1.3383686542510986, + "learning_rate": 4.787200816150014e-05, + "loss": 0.7508, + "step": 4730 + }, + { + "epoch": 0.6652631578947369, + "grad_norm": 1.0097548961639404, + "learning_rate": 4.786309362857839e-05, + "loss": 0.6452, + "step": 4740 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9222456812858582, + "learning_rate": 4.785416129595463e-05, + "loss": 0.6171, + "step": 4750 + }, + { + "epoch": 0.6680701754385965, + "grad_norm": 0.9993833303451538, + "learning_rate": 4.784521117058298e-05, + "loss": 0.654, + "step": 4760 + }, + { + "epoch": 0.6694736842105263, + "grad_norm": 0.6470888257026672, + "learning_rate": 4.7836243259431425e-05, + "loss": 0.6674, + "step": 4770 + }, + { + "epoch": 0.6708771929824562, + "grad_norm": 0.8498440980911255, + "learning_rate": 4.7827257569481776e-05, + "loss": 0.6319, + "step": 4780 + }, + { + "epoch": 0.6722807017543859, + "grad_norm": 0.9220410585403442, + "learning_rate": 4.781825410772972e-05, + "loss": 0.5856, + "step": 4790 + }, + { + "epoch": 0.6736842105263158, + "grad_norm": 1.065016746520996, + "learning_rate": 4.780923288118475e-05, + "loss": 0.5919, + "step": 4800 + }, + { + "epoch": 0.6750877192982456, + "grad_norm": 0.7213327288627625, + "learning_rate": 4.78001938968702e-05, + "loss": 0.6192, + "step": 4810 + }, + { + "epoch": 0.6764912280701755, + "grad_norm": 0.7141574025154114, + "learning_rate": 4.779113716182323e-05, + "loss": 0.6628, + "step": 4820 + }, + { + "epoch": 0.6778947368421052, + "grad_norm": 0.7694927453994751, + "learning_rate": 4.778206268309482e-05, + "loss": 0.6451, + "step": 4830 + }, + { + "epoch": 0.6792982456140351, + "grad_norm": 1.2971090078353882, + "learning_rate": 4.777297046774977e-05, + "loss": 0.6823, + "step": 4840 + }, + { + "epoch": 0.6807017543859649, + "grad_norm": 0.7971644401550293, + "learning_rate": 4.7763860522866665e-05, + "loss": 0.6916, + "step": 4850 + }, + { + "epoch": 0.6821052631578948, + "grad_norm": 0.8853887319564819, + "learning_rate": 4.775473285553792e-05, + "loss": 0.5936, + "step": 4860 + }, + { + "epoch": 0.6835087719298245, + "grad_norm": 1.3222453594207764, + "learning_rate": 4.774558747286973e-05, + "loss": 0.7202, + "step": 4870 + }, + { + "epoch": 0.6849122807017544, + "grad_norm": 1.187171220779419, + "learning_rate": 4.77364243819821e-05, + "loss": 0.6405, + "step": 4880 + }, + { + "epoch": 0.6863157894736842, + "grad_norm": 0.8649610280990601, + "learning_rate": 4.7727243590008806e-05, + "loss": 0.6704, + "step": 4890 + }, + { + "epoch": 0.6877192982456141, + "grad_norm": 0.9361883401870728, + "learning_rate": 4.771804510409741e-05, + "loss": 0.6304, + "step": 4900 + }, + { + "epoch": 0.6891228070175439, + "grad_norm": 0.7870001196861267, + "learning_rate": 4.7708828931409236e-05, + "loss": 0.6645, + "step": 4910 + }, + { + "epoch": 0.6905263157894737, + "grad_norm": 1.0028226375579834, + "learning_rate": 4.769959507911941e-05, + "loss": 0.7018, + "step": 4920 + }, + { + "epoch": 0.6919298245614035, + "grad_norm": 0.7500180602073669, + "learning_rate": 4.769034355441678e-05, + "loss": 0.5191, + "step": 4930 + }, + { + "epoch": 0.6933333333333334, + "grad_norm": 0.9766993522644043, + "learning_rate": 4.7681074364503995e-05, + "loss": 0.6723, + "step": 4940 + }, + { + "epoch": 0.6947368421052632, + "grad_norm": 1.3899115324020386, + "learning_rate": 4.767178751659743e-05, + "loss": 0.7069, + "step": 4950 + }, + { + "epoch": 0.696140350877193, + "grad_norm": 1.3812363147735596, + "learning_rate": 4.7662483017927215e-05, + "loss": 0.6333, + "step": 4960 + }, + { + "epoch": 0.6975438596491228, + "grad_norm": 0.6967772841453552, + "learning_rate": 4.765316087573722e-05, + "loss": 0.7116, + "step": 4970 + }, + { + "epoch": 0.6989473684210527, + "grad_norm": 1.235410213470459, + "learning_rate": 4.7643821097285044e-05, + "loss": 0.5517, + "step": 4980 + }, + { + "epoch": 0.7003508771929825, + "grad_norm": 1.0389471054077148, + "learning_rate": 4.763446368984205e-05, + "loss": 0.6856, + "step": 4990 + }, + { + "epoch": 0.7017543859649122, + "grad_norm": 0.9552194476127625, + "learning_rate": 4.762508866069327e-05, + "loss": 0.6119, + "step": 5000 + }, + { + "epoch": 0.7031578947368421, + "grad_norm": 0.8866641521453857, + "learning_rate": 4.7615696017137504e-05, + "loss": 0.5645, + "step": 5010 + }, + { + "epoch": 0.7045614035087719, + "grad_norm": 1.0465891361236572, + "learning_rate": 4.760628576648723e-05, + "loss": 0.6506, + "step": 5020 + }, + { + "epoch": 0.7059649122807018, + "grad_norm": 1.104183316230774, + "learning_rate": 4.759685791606868e-05, + "loss": 0.6092, + "step": 5030 + }, + { + "epoch": 0.7073684210526315, + "grad_norm": 0.8748829364776611, + "learning_rate": 4.758741247322174e-05, + "loss": 0.7659, + "step": 5040 + }, + { + "epoch": 0.7087719298245614, + "grad_norm": 0.9573276042938232, + "learning_rate": 4.7577949445300004e-05, + "loss": 0.5774, + "step": 5050 + }, + { + "epoch": 0.7101754385964912, + "grad_norm": 0.9269713759422302, + "learning_rate": 4.756846883967077e-05, + "loss": 0.6234, + "step": 5060 + }, + { + "epoch": 0.7115789473684211, + "grad_norm": 0.6953681111335754, + "learning_rate": 4.755897066371502e-05, + "loss": 0.6456, + "step": 5070 + }, + { + "epoch": 0.7129824561403508, + "grad_norm": 0.6628289818763733, + "learning_rate": 4.754945492482741e-05, + "loss": 0.54, + "step": 5080 + }, + { + "epoch": 0.7143859649122807, + "grad_norm": 0.7972025871276855, + "learning_rate": 4.7539921630416264e-05, + "loss": 0.5695, + "step": 5090 + }, + { + "epoch": 0.7157894736842105, + "grad_norm": 1.032006859779358, + "learning_rate": 4.7530370787903576e-05, + "loss": 0.6748, + "step": 5100 + }, + { + "epoch": 0.7171929824561404, + "grad_norm": 0.7866501212120056, + "learning_rate": 4.7520802404725007e-05, + "loss": 0.595, + "step": 5110 + }, + { + "epoch": 0.7185964912280701, + "grad_norm": 1.0693832635879517, + "learning_rate": 4.751121648832987e-05, + "loss": 0.641, + "step": 5120 + }, + { + "epoch": 0.72, + "grad_norm": 1.0331542491912842, + "learning_rate": 4.750161304618114e-05, + "loss": 0.6345, + "step": 5130 + }, + { + "epoch": 0.7214035087719298, + "grad_norm": 0.6064502000808716, + "learning_rate": 4.749199208575541e-05, + "loss": 0.5997, + "step": 5140 + }, + { + "epoch": 0.7228070175438597, + "grad_norm": 1.1691397428512573, + "learning_rate": 4.748235361454293e-05, + "loss": 0.6529, + "step": 5150 + }, + { + "epoch": 0.7242105263157895, + "grad_norm": 0.7956925630569458, + "learning_rate": 4.7472697640047594e-05, + "loss": 0.5668, + "step": 5160 + }, + { + "epoch": 0.7256140350877193, + "grad_norm": 0.9746783971786499, + "learning_rate": 4.7463024169786895e-05, + "loss": 0.6433, + "step": 5170 + }, + { + "epoch": 0.7270175438596491, + "grad_norm": 1.2105709314346313, + "learning_rate": 4.745333321129197e-05, + "loss": 0.6749, + "step": 5180 + }, + { + "epoch": 0.728421052631579, + "grad_norm": 0.7860882878303528, + "learning_rate": 4.744362477210755e-05, + "loss": 0.7041, + "step": 5190 + }, + { + "epoch": 0.7298245614035088, + "grad_norm": 1.1629239320755005, + "learning_rate": 4.7433898859792e-05, + "loss": 0.5598, + "step": 5200 + }, + { + "epoch": 0.7312280701754386, + "grad_norm": 1.1319113969802856, + "learning_rate": 4.742415548191728e-05, + "loss": 0.6433, + "step": 5210 + }, + { + "epoch": 0.7326315789473684, + "grad_norm": 0.8640940189361572, + "learning_rate": 4.741439464606893e-05, + "loss": 0.6715, + "step": 5220 + }, + { + "epoch": 0.7340350877192983, + "grad_norm": 0.7730684280395508, + "learning_rate": 4.740461635984609e-05, + "loss": 0.6391, + "step": 5230 + }, + { + "epoch": 0.7354385964912281, + "grad_norm": 1.2042145729064941, + "learning_rate": 4.739482063086152e-05, + "loss": 0.5834, + "step": 5240 + }, + { + "epoch": 0.7368421052631579, + "grad_norm": 0.9434259533882141, + "learning_rate": 4.73850074667415e-05, + "loss": 0.7835, + "step": 5250 + }, + { + "epoch": 0.7382456140350877, + "grad_norm": 0.8331650495529175, + "learning_rate": 4.737517687512593e-05, + "loss": 0.6128, + "step": 5260 + }, + { + "epoch": 0.7396491228070176, + "grad_norm": 0.8063735365867615, + "learning_rate": 4.7365328863668256e-05, + "loss": 0.655, + "step": 5270 + }, + { + "epoch": 0.7410526315789474, + "grad_norm": 0.6377186179161072, + "learning_rate": 4.735546344003551e-05, + "loss": 0.6506, + "step": 5280 + }, + { + "epoch": 0.7424561403508771, + "grad_norm": 0.8817654252052307, + "learning_rate": 4.734558061190824e-05, + "loss": 0.6984, + "step": 5290 + }, + { + "epoch": 0.743859649122807, + "grad_norm": 1.2554540634155273, + "learning_rate": 4.733568038698057e-05, + "loss": 0.7401, + "step": 5300 + }, + { + "epoch": 0.7452631578947368, + "grad_norm": 1.0858135223388672, + "learning_rate": 4.732576277296017e-05, + "loss": 0.6432, + "step": 5310 + }, + { + "epoch": 0.7466666666666667, + "grad_norm": 1.1962653398513794, + "learning_rate": 4.731582777756825e-05, + "loss": 0.6687, + "step": 5320 + }, + { + "epoch": 0.7480701754385964, + "grad_norm": 1.0213031768798828, + "learning_rate": 4.730587540853954e-05, + "loss": 0.6489, + "step": 5330 + }, + { + "epoch": 0.7494736842105263, + "grad_norm": 0.8629382252693176, + "learning_rate": 4.729590567362228e-05, + "loss": 0.6149, + "step": 5340 + }, + { + "epoch": 0.7508771929824561, + "grad_norm": 0.6692180633544922, + "learning_rate": 4.728591858057827e-05, + "loss": 0.6227, + "step": 5350 + }, + { + "epoch": 0.752280701754386, + "grad_norm": 0.9368489980697632, + "learning_rate": 4.727591413718282e-05, + "loss": 0.712, + "step": 5360 + }, + { + "epoch": 0.7536842105263157, + "grad_norm": 1.1019880771636963, + "learning_rate": 4.7265892351224694e-05, + "loss": 0.7172, + "step": 5370 + }, + { + "epoch": 0.7550877192982456, + "grad_norm": 0.8168277144432068, + "learning_rate": 4.725585323050623e-05, + "loss": 0.6812, + "step": 5380 + }, + { + "epoch": 0.7564912280701754, + "grad_norm": 1.0383678674697876, + "learning_rate": 4.72457967828432e-05, + "loss": 0.6266, + "step": 5390 + }, + { + "epoch": 0.7578947368421053, + "grad_norm": 1.9418814182281494, + "learning_rate": 4.723572301606492e-05, + "loss": 0.6976, + "step": 5400 + }, + { + "epoch": 0.7592982456140351, + "grad_norm": 1.1380218267440796, + "learning_rate": 4.7225631938014134e-05, + "loss": 0.709, + "step": 5410 + }, + { + "epoch": 0.7607017543859649, + "grad_norm": 0.7876071333885193, + "learning_rate": 4.7215523556547116e-05, + "loss": 0.5956, + "step": 5420 + }, + { + "epoch": 0.7621052631578947, + "grad_norm": 0.9458256363868713, + "learning_rate": 4.720539787953357e-05, + "loss": 0.6943, + "step": 5430 + }, + { + "epoch": 0.7635087719298246, + "grad_norm": 0.6351762413978577, + "learning_rate": 4.71952549148567e-05, + "loss": 0.6322, + "step": 5440 + }, + { + "epoch": 0.7649122807017544, + "grad_norm": 0.8464050889015198, + "learning_rate": 4.7185094670413134e-05, + "loss": 0.6258, + "step": 5450 + }, + { + "epoch": 0.7663157894736842, + "grad_norm": 1.7159314155578613, + "learning_rate": 4.7174917154112984e-05, + "loss": 0.6347, + "step": 5460 + }, + { + "epoch": 0.767719298245614, + "grad_norm": 0.8159227967262268, + "learning_rate": 4.716472237387979e-05, + "loss": 0.6423, + "step": 5470 + }, + { + "epoch": 0.7691228070175439, + "grad_norm": 1.1517149209976196, + "learning_rate": 4.715451033765054e-05, + "loss": 0.6614, + "step": 5480 + }, + { + "epoch": 0.7705263157894737, + "grad_norm": 1.164534091949463, + "learning_rate": 4.714428105337565e-05, + "loss": 0.6326, + "step": 5490 + }, + { + "epoch": 0.7719298245614035, + "grad_norm": 1.0906124114990234, + "learning_rate": 4.713403452901898e-05, + "loss": 0.6146, + "step": 5500 + }, + { + "epoch": 0.7733333333333333, + "grad_norm": 0.7224928140640259, + "learning_rate": 4.7123770772557774e-05, + "loss": 0.6061, + "step": 5510 + }, + { + "epoch": 0.7747368421052632, + "grad_norm": 1.1344630718231201, + "learning_rate": 4.711348979198274e-05, + "loss": 0.7423, + "step": 5520 + }, + { + "epoch": 0.776140350877193, + "grad_norm": 1.0616703033447266, + "learning_rate": 4.710319159529798e-05, + "loss": 0.6648, + "step": 5530 + }, + { + "epoch": 0.7775438596491228, + "grad_norm": 0.8563722968101501, + "learning_rate": 4.709287619052098e-05, + "loss": 0.551, + "step": 5540 + }, + { + "epoch": 0.7789473684210526, + "grad_norm": 0.7541974186897278, + "learning_rate": 4.708254358568264e-05, + "loss": 0.7394, + "step": 5550 + }, + { + "epoch": 0.7803508771929825, + "grad_norm": 0.9201952815055847, + "learning_rate": 4.7072193788827236e-05, + "loss": 0.57, + "step": 5560 + }, + { + "epoch": 0.7817543859649123, + "grad_norm": 0.8615202307701111, + "learning_rate": 4.706182680801245e-05, + "loss": 0.6293, + "step": 5570 + }, + { + "epoch": 0.783157894736842, + "grad_norm": 0.8534351587295532, + "learning_rate": 4.705144265130934e-05, + "loss": 0.6007, + "step": 5580 + }, + { + "epoch": 0.7845614035087719, + "grad_norm": 0.8691478967666626, + "learning_rate": 4.704104132680231e-05, + "loss": 0.5963, + "step": 5590 + }, + { + "epoch": 0.7859649122807018, + "grad_norm": 1.209688663482666, + "learning_rate": 4.703062284258916e-05, + "loss": 0.7237, + "step": 5600 + }, + { + "epoch": 0.7873684210526316, + "grad_norm": 0.72704017162323, + "learning_rate": 4.702018720678103e-05, + "loss": 0.6452, + "step": 5610 + }, + { + "epoch": 0.7887719298245615, + "grad_norm": 1.3118873834609985, + "learning_rate": 4.7009734427502426e-05, + "loss": 0.6291, + "step": 5620 + }, + { + "epoch": 0.7901754385964912, + "grad_norm": 0.6223419308662415, + "learning_rate": 4.699926451289119e-05, + "loss": 0.5925, + "step": 5630 + }, + { + "epoch": 0.791578947368421, + "grad_norm": 1.0733870267868042, + "learning_rate": 4.698877747109852e-05, + "loss": 0.7342, + "step": 5640 + }, + { + "epoch": 0.7929824561403509, + "grad_norm": 0.7960459589958191, + "learning_rate": 4.697827331028893e-05, + "loss": 0.644, + "step": 5650 + }, + { + "epoch": 0.7943859649122808, + "grad_norm": 0.9189769625663757, + "learning_rate": 4.6967752038640264e-05, + "loss": 0.6567, + "step": 5660 + }, + { + "epoch": 0.7957894736842105, + "grad_norm": 1.1323273181915283, + "learning_rate": 4.695721366434369e-05, + "loss": 0.6873, + "step": 5670 + }, + { + "epoch": 0.7971929824561403, + "grad_norm": 0.8580273389816284, + "learning_rate": 4.694665819560371e-05, + "loss": 0.6733, + "step": 5680 + }, + { + "epoch": 0.7985964912280702, + "grad_norm": 1.3165494203567505, + "learning_rate": 4.693608564063811e-05, + "loss": 0.642, + "step": 5690 + }, + { + "epoch": 0.8, + "grad_norm": 0.7017198801040649, + "learning_rate": 4.692549600767798e-05, + "loss": 0.5438, + "step": 5700 + }, + { + "epoch": 0.8014035087719298, + "grad_norm": 0.8478591442108154, + "learning_rate": 4.6914889304967725e-05, + "loss": 0.6107, + "step": 5710 + }, + { + "epoch": 0.8028070175438596, + "grad_norm": 0.9716276526451111, + "learning_rate": 4.690426554076501e-05, + "loss": 0.5975, + "step": 5720 + }, + { + "epoch": 0.8042105263157895, + "grad_norm": 1.0631777048110962, + "learning_rate": 4.689362472334082e-05, + "loss": 0.6563, + "step": 5730 + }, + { + "epoch": 0.8056140350877193, + "grad_norm": 0.9736322164535522, + "learning_rate": 4.688296686097937e-05, + "loss": 0.6199, + "step": 5740 + }, + { + "epoch": 0.8070175438596491, + "grad_norm": 0.9049164652824402, + "learning_rate": 4.6872291961978195e-05, + "loss": 0.5772, + "step": 5750 + }, + { + "epoch": 0.8084210526315789, + "grad_norm": 1.2360827922821045, + "learning_rate": 4.6861600034648064e-05, + "loss": 0.6401, + "step": 5760 + }, + { + "epoch": 0.8098245614035088, + "grad_norm": 1.2036852836608887, + "learning_rate": 4.6850891087313e-05, + "loss": 0.6087, + "step": 5770 + }, + { + "epoch": 0.8112280701754386, + "grad_norm": 1.010108470916748, + "learning_rate": 4.6840165128310296e-05, + "loss": 0.6973, + "step": 5780 + }, + { + "epoch": 0.8126315789473684, + "grad_norm": 1.1753820180892944, + "learning_rate": 4.6829422165990475e-05, + "loss": 0.6509, + "step": 5790 + }, + { + "epoch": 0.8140350877192982, + "grad_norm": 1.0416866540908813, + "learning_rate": 4.6818662208717296e-05, + "loss": 0.6092, + "step": 5800 + }, + { + "epoch": 0.8154385964912281, + "grad_norm": 0.7539423108100891, + "learning_rate": 4.680788526486776e-05, + "loss": 0.5864, + "step": 5810 + }, + { + "epoch": 0.8168421052631579, + "grad_norm": 0.9227228164672852, + "learning_rate": 4.679709134283209e-05, + "loss": 0.5736, + "step": 5820 + }, + { + "epoch": 0.8182456140350877, + "grad_norm": 0.8869969844818115, + "learning_rate": 4.678628045101371e-05, + "loss": 0.5982, + "step": 5830 + }, + { + "epoch": 0.8196491228070175, + "grad_norm": 0.6802515387535095, + "learning_rate": 4.677545259782929e-05, + "loss": 0.6136, + "step": 5840 + }, + { + "epoch": 0.8210526315789474, + "grad_norm": 0.9065477848052979, + "learning_rate": 4.676460779170867e-05, + "loss": 0.6519, + "step": 5850 + }, + { + "epoch": 0.8224561403508772, + "grad_norm": 1.3136307001113892, + "learning_rate": 4.675374604109491e-05, + "loss": 0.6122, + "step": 5860 + }, + { + "epoch": 0.8238596491228071, + "grad_norm": 0.9648601412773132, + "learning_rate": 4.6742867354444256e-05, + "loss": 0.5582, + "step": 5870 + }, + { + "epoch": 0.8252631578947368, + "grad_norm": 1.085227370262146, + "learning_rate": 4.673197174022613e-05, + "loss": 0.6788, + "step": 5880 + }, + { + "epoch": 0.8266666666666667, + "grad_norm": 0.9866172075271606, + "learning_rate": 4.672105920692316e-05, + "loss": 0.647, + "step": 5890 + }, + { + "epoch": 0.8280701754385965, + "grad_norm": 1.5403311252593994, + "learning_rate": 4.6710129763031095e-05, + "loss": 0.6326, + "step": 5900 + }, + { + "epoch": 0.8294736842105264, + "grad_norm": 1.195082187652588, + "learning_rate": 4.669918341705891e-05, + "loss": 0.6205, + "step": 5910 + }, + { + "epoch": 0.8308771929824561, + "grad_norm": 0.9392557740211487, + "learning_rate": 4.66882201775287e-05, + "loss": 0.699, + "step": 5920 + }, + { + "epoch": 0.832280701754386, + "grad_norm": 1.280907392501831, + "learning_rate": 4.667724005297573e-05, + "loss": 0.6147, + "step": 5930 + }, + { + "epoch": 0.8336842105263158, + "grad_norm": 0.6876835823059082, + "learning_rate": 4.66662430519484e-05, + "loss": 0.5737, + "step": 5940 + }, + { + "epoch": 0.8350877192982457, + "grad_norm": 0.7067710161209106, + "learning_rate": 4.665522918300823e-05, + "loss": 0.6072, + "step": 5950 + }, + { + "epoch": 0.8364912280701754, + "grad_norm": 1.0336652994155884, + "learning_rate": 4.6644198454729933e-05, + "loss": 0.6296, + "step": 5960 + }, + { + "epoch": 0.8378947368421052, + "grad_norm": 1.3756647109985352, + "learning_rate": 4.663315087570128e-05, + "loss": 0.6489, + "step": 5970 + }, + { + "epoch": 0.8392982456140351, + "grad_norm": 1.0433988571166992, + "learning_rate": 4.662208645452321e-05, + "loss": 0.6742, + "step": 5980 + }, + { + "epoch": 0.840701754385965, + "grad_norm": 0.6354380249977112, + "learning_rate": 4.661100519980973e-05, + "loss": 0.573, + "step": 5990 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 1.112243890762329, + "learning_rate": 4.6599907120188005e-05, + "loss": 0.6455, + "step": 6000 + }, + { + "epoch": 0.8421052631578947, + "eval_loss": 0.6415141820907593, + "eval_runtime": 44.3465, + "eval_samples_per_second": 33.825, + "eval_steps_per_second": 8.456, + "step": 6000 + }, + { + "epoch": 0.8435087719298245, + "grad_norm": 0.781201958656311, + "learning_rate": 4.658879222429825e-05, + "loss": 0.5362, + "step": 6010 + }, + { + "epoch": 0.8449122807017544, + "grad_norm": 1.069032073020935, + "learning_rate": 4.65776605207938e-05, + "loss": 0.6321, + "step": 6020 + }, + { + "epoch": 0.8463157894736842, + "grad_norm": 1.0449451208114624, + "learning_rate": 4.656651201834106e-05, + "loss": 0.6208, + "step": 6030 + }, + { + "epoch": 0.847719298245614, + "grad_norm": 1.9674957990646362, + "learning_rate": 4.655534672561953e-05, + "loss": 0.6529, + "step": 6040 + }, + { + "epoch": 0.8491228070175438, + "grad_norm": 0.9335805773735046, + "learning_rate": 4.654416465132177e-05, + "loss": 0.6515, + "step": 6050 + }, + { + "epoch": 0.8505263157894737, + "grad_norm": 0.8951327800750732, + "learning_rate": 4.6532965804153416e-05, + "loss": 0.613, + "step": 6060 + }, + { + "epoch": 0.8519298245614035, + "grad_norm": 1.1679803133010864, + "learning_rate": 4.652175019283314e-05, + "loss": 0.6215, + "step": 6070 + }, + { + "epoch": 0.8533333333333334, + "grad_norm": 1.6771854162216187, + "learning_rate": 4.6510517826092695e-05, + "loss": 0.7427, + "step": 6080 + }, + { + "epoch": 0.8547368421052631, + "grad_norm": 0.6942294836044312, + "learning_rate": 4.649926871267685e-05, + "loss": 0.58, + "step": 6090 + }, + { + "epoch": 0.856140350877193, + "grad_norm": 1.114723801612854, + "learning_rate": 4.6488002861343425e-05, + "loss": 0.6916, + "step": 6100 + }, + { + "epoch": 0.8575438596491228, + "grad_norm": 0.9489352107048035, + "learning_rate": 4.647672028086328e-05, + "loss": 0.6073, + "step": 6110 + }, + { + "epoch": 0.8589473684210527, + "grad_norm": 0.8159108757972717, + "learning_rate": 4.646542098002029e-05, + "loss": 0.6273, + "step": 6120 + }, + { + "epoch": 0.8603508771929824, + "grad_norm": 1.2675360441207886, + "learning_rate": 4.645410496761135e-05, + "loss": 0.6657, + "step": 6130 + }, + { + "epoch": 0.8617543859649123, + "grad_norm": 0.9706358313560486, + "learning_rate": 4.644277225244635e-05, + "loss": 0.6861, + "step": 6140 + }, + { + "epoch": 0.8631578947368421, + "grad_norm": 1.0342049598693848, + "learning_rate": 4.6431422843348216e-05, + "loss": 0.6834, + "step": 6150 + }, + { + "epoch": 0.864561403508772, + "grad_norm": 0.9016236066818237, + "learning_rate": 4.642005674915284e-05, + "loss": 0.6098, + "step": 6160 + }, + { + "epoch": 0.8659649122807017, + "grad_norm": 0.8684419989585876, + "learning_rate": 4.640867397870912e-05, + "loss": 0.6831, + "step": 6170 + }, + { + "epoch": 0.8673684210526316, + "grad_norm": 0.8743478059768677, + "learning_rate": 4.639727454087892e-05, + "loss": 0.5846, + "step": 6180 + }, + { + "epoch": 0.8687719298245614, + "grad_norm": 1.0925372838974, + "learning_rate": 4.638585844453711e-05, + "loss": 0.6436, + "step": 6190 + }, + { + "epoch": 0.8701754385964913, + "grad_norm": 1.0224460363388062, + "learning_rate": 4.6374425698571514e-05, + "loss": 0.7538, + "step": 6200 + }, + { + "epoch": 0.871578947368421, + "grad_norm": 0.8540046215057373, + "learning_rate": 4.63629763118829e-05, + "loss": 0.596, + "step": 6210 + }, + { + "epoch": 0.8729824561403509, + "grad_norm": 0.9685525298118591, + "learning_rate": 4.6351510293385026e-05, + "loss": 0.5844, + "step": 6220 + }, + { + "epoch": 0.8743859649122807, + "grad_norm": 0.9988105893135071, + "learning_rate": 4.634002765200456e-05, + "loss": 0.5785, + "step": 6230 + }, + { + "epoch": 0.8757894736842106, + "grad_norm": 0.7331526279449463, + "learning_rate": 4.632852839668115e-05, + "loss": 0.5728, + "step": 6240 + }, + { + "epoch": 0.8771929824561403, + "grad_norm": 1.0520068407058716, + "learning_rate": 4.6317012536367354e-05, + "loss": 0.6317, + "step": 6250 + }, + { + "epoch": 0.8785964912280702, + "grad_norm": 1.117604374885559, + "learning_rate": 4.630548008002866e-05, + "loss": 0.6152, + "step": 6260 + }, + { + "epoch": 0.88, + "grad_norm": 0.7635726928710938, + "learning_rate": 4.629393103664349e-05, + "loss": 0.64, + "step": 6270 + }, + { + "epoch": 0.8814035087719299, + "grad_norm": 1.1754323244094849, + "learning_rate": 4.6282365415203164e-05, + "loss": 0.5923, + "step": 6280 + }, + { + "epoch": 0.8828070175438596, + "grad_norm": 0.6220813989639282, + "learning_rate": 4.627078322471191e-05, + "loss": 0.6745, + "step": 6290 + }, + { + "epoch": 0.8842105263157894, + "grad_norm": 0.8440349698066711, + "learning_rate": 4.625918447418687e-05, + "loss": 0.5819, + "step": 6300 + }, + { + "epoch": 0.8856140350877193, + "grad_norm": 1.0416796207427979, + "learning_rate": 4.624756917265807e-05, + "loss": 0.5965, + "step": 6310 + }, + { + "epoch": 0.8870175438596491, + "grad_norm": 1.1395715475082397, + "learning_rate": 4.62359373291684e-05, + "loss": 0.5838, + "step": 6320 + }, + { + "epoch": 0.888421052631579, + "grad_norm": 1.3868945837020874, + "learning_rate": 4.622428895277367e-05, + "loss": 0.7304, + "step": 6330 + }, + { + "epoch": 0.8898245614035087, + "grad_norm": 1.0000405311584473, + "learning_rate": 4.621262405254253e-05, + "loss": 0.5938, + "step": 6340 + }, + { + "epoch": 0.8912280701754386, + "grad_norm": 0.754399836063385, + "learning_rate": 4.620094263755652e-05, + "loss": 0.6276, + "step": 6350 + }, + { + "epoch": 0.8926315789473684, + "grad_norm": 0.9784127473831177, + "learning_rate": 4.618924471691e-05, + "loss": 0.613, + "step": 6360 + }, + { + "epoch": 0.8940350877192983, + "grad_norm": 0.6419925689697266, + "learning_rate": 4.617753029971021e-05, + "loss": 0.599, + "step": 6370 + }, + { + "epoch": 0.895438596491228, + "grad_norm": 1.2562180757522583, + "learning_rate": 4.6165799395077236e-05, + "loss": 0.6358, + "step": 6380 + }, + { + "epoch": 0.8968421052631579, + "grad_norm": 1.1815166473388672, + "learning_rate": 4.615405201214398e-05, + "loss": 0.6747, + "step": 6390 + }, + { + "epoch": 0.8982456140350877, + "grad_norm": 1.5243850946426392, + "learning_rate": 4.614228816005618e-05, + "loss": 0.6082, + "step": 6400 + }, + { + "epoch": 0.8996491228070176, + "grad_norm": 0.894396960735321, + "learning_rate": 4.61305078479724e-05, + "loss": 0.5506, + "step": 6410 + }, + { + "epoch": 0.9010526315789473, + "grad_norm": 0.7782644629478455, + "learning_rate": 4.611871108506403e-05, + "loss": 0.5816, + "step": 6420 + }, + { + "epoch": 0.9024561403508772, + "grad_norm": 1.2209144830703735, + "learning_rate": 4.610689788051523e-05, + "loss": 0.6178, + "step": 6430 + }, + { + "epoch": 0.903859649122807, + "grad_norm": 0.8224475979804993, + "learning_rate": 4.6095068243523e-05, + "loss": 0.644, + "step": 6440 + }, + { + "epoch": 0.9052631578947369, + "grad_norm": 1.054763674736023, + "learning_rate": 4.608322218329711e-05, + "loss": 0.5564, + "step": 6450 + }, + { + "epoch": 0.9066666666666666, + "grad_norm": 0.9225585460662842, + "learning_rate": 4.607135970906014e-05, + "loss": 0.6534, + "step": 6460 + }, + { + "epoch": 0.9080701754385965, + "grad_norm": 0.7979352474212646, + "learning_rate": 4.605948083004741e-05, + "loss": 0.6671, + "step": 6470 + }, + { + "epoch": 0.9094736842105263, + "grad_norm": 1.223375916481018, + "learning_rate": 4.6047585555507045e-05, + "loss": 0.6996, + "step": 6480 + }, + { + "epoch": 0.9108771929824562, + "grad_norm": 0.803092360496521, + "learning_rate": 4.603567389469993e-05, + "loss": 0.5937, + "step": 6490 + }, + { + "epoch": 0.9122807017543859, + "grad_norm": 0.9455748796463013, + "learning_rate": 4.60237458568997e-05, + "loss": 0.555, + "step": 6500 + }, + { + "epoch": 0.9136842105263158, + "grad_norm": 1.0315808057785034, + "learning_rate": 4.6011801451392736e-05, + "loss": 0.6721, + "step": 6510 + }, + { + "epoch": 0.9150877192982456, + "grad_norm": 1.0508462190628052, + "learning_rate": 4.5999840687478167e-05, + "loss": 0.5849, + "step": 6520 + }, + { + "epoch": 0.9164912280701755, + "grad_norm": 0.8299797773361206, + "learning_rate": 4.598786357446786e-05, + "loss": 0.6013, + "step": 6530 + }, + { + "epoch": 0.9178947368421052, + "grad_norm": 0.8177257776260376, + "learning_rate": 4.5975870121686406e-05, + "loss": 0.6178, + "step": 6540 + }, + { + "epoch": 0.9192982456140351, + "grad_norm": 0.8297099471092224, + "learning_rate": 4.596386033847111e-05, + "loss": 0.5985, + "step": 6550 + }, + { + "epoch": 0.9207017543859649, + "grad_norm": 1.1290909051895142, + "learning_rate": 4.5951834234172025e-05, + "loss": 0.5878, + "step": 6560 + }, + { + "epoch": 0.9221052631578948, + "grad_norm": 0.7982479929924011, + "learning_rate": 4.593979181815187e-05, + "loss": 0.7004, + "step": 6570 + }, + { + "epoch": 0.9235087719298246, + "grad_norm": 0.9358506202697754, + "learning_rate": 4.5927733099786066e-05, + "loss": 0.615, + "step": 6580 + }, + { + "epoch": 0.9249122807017544, + "grad_norm": 0.8648248910903931, + "learning_rate": 4.591565808846276e-05, + "loss": 0.5789, + "step": 6590 + }, + { + "epoch": 0.9263157894736842, + "grad_norm": 1.1170403957366943, + "learning_rate": 4.5903566793582755e-05, + "loss": 0.5827, + "step": 6600 + }, + { + "epoch": 0.927719298245614, + "grad_norm": 1.5100983381271362, + "learning_rate": 4.589145922455954e-05, + "loss": 0.6176, + "step": 6610 + }, + { + "epoch": 0.9291228070175439, + "grad_norm": 1.2393382787704468, + "learning_rate": 4.587933539081927e-05, + "loss": 0.7056, + "step": 6620 + }, + { + "epoch": 0.9305263157894736, + "grad_norm": 0.8480477333068848, + "learning_rate": 4.586719530180075e-05, + "loss": 0.6418, + "step": 6630 + }, + { + "epoch": 0.9319298245614035, + "grad_norm": 1.1262218952178955, + "learning_rate": 4.585503896695549e-05, + "loss": 0.5231, + "step": 6640 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 1.1744680404663086, + "learning_rate": 4.584286639574758e-05, + "loss": 0.7463, + "step": 6650 + }, + { + "epoch": 0.9347368421052632, + "grad_norm": 1.0590097904205322, + "learning_rate": 4.58306775976538e-05, + "loss": 0.6373, + "step": 6660 + }, + { + "epoch": 0.9361403508771929, + "grad_norm": 1.1323457956314087, + "learning_rate": 4.581847258216355e-05, + "loss": 0.6609, + "step": 6670 + }, + { + "epoch": 0.9375438596491228, + "grad_norm": 1.043113350868225, + "learning_rate": 4.580625135877884e-05, + "loss": 0.6366, + "step": 6680 + }, + { + "epoch": 0.9389473684210526, + "grad_norm": 0.6503088474273682, + "learning_rate": 4.5794013937014326e-05, + "loss": 0.6359, + "step": 6690 + }, + { + "epoch": 0.9403508771929825, + "grad_norm": 1.6966040134429932, + "learning_rate": 4.578176032639724e-05, + "loss": 0.7239, + "step": 6700 + }, + { + "epoch": 0.9417543859649122, + "grad_norm": 0.7537420988082886, + "learning_rate": 4.5769490536467465e-05, + "loss": 0.5848, + "step": 6710 + }, + { + "epoch": 0.9431578947368421, + "grad_norm": 1.1561657190322876, + "learning_rate": 4.5757204576777437e-05, + "loss": 0.6155, + "step": 6720 + }, + { + "epoch": 0.9445614035087719, + "grad_norm": 0.6912992000579834, + "learning_rate": 4.574490245689219e-05, + "loss": 0.5817, + "step": 6730 + }, + { + "epoch": 0.9459649122807018, + "grad_norm": 1.0430986881256104, + "learning_rate": 4.573258418638936e-05, + "loss": 0.4903, + "step": 6740 + }, + { + "epoch": 0.9473684210526315, + "grad_norm": 0.9669928550720215, + "learning_rate": 4.572024977485914e-05, + "loss": 0.6289, + "step": 6750 + }, + { + "epoch": 0.9487719298245614, + "grad_norm": 0.9165218472480774, + "learning_rate": 4.5707899231904286e-05, + "loss": 0.6133, + "step": 6760 + }, + { + "epoch": 0.9501754385964912, + "grad_norm": 0.9709174036979675, + "learning_rate": 4.569553256714012e-05, + "loss": 0.638, + "step": 6770 + }, + { + "epoch": 0.9515789473684211, + "grad_norm": 1.2939212322235107, + "learning_rate": 4.5683149790194526e-05, + "loss": 0.7599, + "step": 6780 + }, + { + "epoch": 0.9529824561403509, + "grad_norm": 1.0535619258880615, + "learning_rate": 4.5670750910707903e-05, + "loss": 0.6906, + "step": 6790 + }, + { + "epoch": 0.9543859649122807, + "grad_norm": 0.9928086400032043, + "learning_rate": 4.565833593833321e-05, + "loss": 0.7387, + "step": 6800 + }, + { + "epoch": 0.9557894736842105, + "grad_norm": 0.7087190747261047, + "learning_rate": 4.5645904882735935e-05, + "loss": 0.5566, + "step": 6810 + }, + { + "epoch": 0.9571929824561404, + "grad_norm": 1.2111977338790894, + "learning_rate": 4.563345775359408e-05, + "loss": 0.5748, + "step": 6820 + }, + { + "epoch": 0.9585964912280702, + "grad_norm": 1.2516281604766846, + "learning_rate": 4.562099456059815e-05, + "loss": 0.6256, + "step": 6830 + }, + { + "epoch": 0.96, + "grad_norm": 0.8081939220428467, + "learning_rate": 4.5608515313451186e-05, + "loss": 0.5826, + "step": 6840 + }, + { + "epoch": 0.9614035087719298, + "grad_norm": 1.453393578529358, + "learning_rate": 4.559602002186869e-05, + "loss": 0.5538, + "step": 6850 + }, + { + "epoch": 0.9628070175438597, + "grad_norm": 1.139618158340454, + "learning_rate": 4.558350869557868e-05, + "loss": 0.6514, + "step": 6860 + }, + { + "epoch": 0.9642105263157895, + "grad_norm": 0.9846227765083313, + "learning_rate": 4.557098134432167e-05, + "loss": 0.7813, + "step": 6870 + }, + { + "epoch": 0.9656140350877193, + "grad_norm": 0.8734840750694275, + "learning_rate": 4.555843797785061e-05, + "loss": 0.5993, + "step": 6880 + }, + { + "epoch": 0.9670175438596491, + "grad_norm": 1.172455072402954, + "learning_rate": 4.554587860593095e-05, + "loss": 0.5594, + "step": 6890 + }, + { + "epoch": 0.968421052631579, + "grad_norm": 0.5644223690032959, + "learning_rate": 4.553330323834059e-05, + "loss": 0.5581, + "step": 6900 + }, + { + "epoch": 0.9698245614035088, + "grad_norm": 0.7265453338623047, + "learning_rate": 4.552071188486989e-05, + "loss": 0.6295, + "step": 6910 + }, + { + "epoch": 0.9712280701754386, + "grad_norm": 0.8341143727302551, + "learning_rate": 4.550810455532164e-05, + "loss": 0.5697, + "step": 6920 + }, + { + "epoch": 0.9726315789473684, + "grad_norm": 0.7036447525024414, + "learning_rate": 4.5495481259511095e-05, + "loss": 0.5933, + "step": 6930 + }, + { + "epoch": 0.9740350877192983, + "grad_norm": 0.972158670425415, + "learning_rate": 4.54828420072659e-05, + "loss": 0.5723, + "step": 6940 + }, + { + "epoch": 0.9754385964912281, + "grad_norm": 1.3979262113571167, + "learning_rate": 4.547018680842616e-05, + "loss": 0.5749, + "step": 6950 + }, + { + "epoch": 0.9768421052631578, + "grad_norm": 1.3824502229690552, + "learning_rate": 4.545751567284439e-05, + "loss": 0.7053, + "step": 6960 + }, + { + "epoch": 0.9782456140350877, + "grad_norm": 1.1198428869247437, + "learning_rate": 4.5444828610385486e-05, + "loss": 0.629, + "step": 6970 + }, + { + "epoch": 0.9796491228070175, + "grad_norm": 0.7075201869010925, + "learning_rate": 4.543212563092677e-05, + "loss": 0.6647, + "step": 6980 + }, + { + "epoch": 0.9810526315789474, + "grad_norm": 1.0392223596572876, + "learning_rate": 4.541940674435794e-05, + "loss": 0.6921, + "step": 6990 + }, + { + "epoch": 0.9824561403508771, + "grad_norm": 1.027004599571228, + "learning_rate": 4.5406671960581096e-05, + "loss": 0.6187, + "step": 7000 + }, + { + "epoch": 0.983859649122807, + "grad_norm": 1.2175973653793335, + "learning_rate": 4.53939212895107e-05, + "loss": 0.6471, + "step": 7010 + }, + { + "epoch": 0.9852631578947368, + "grad_norm": 1.0904464721679688, + "learning_rate": 4.538115474107357e-05, + "loss": 0.5916, + "step": 7020 + }, + { + "epoch": 0.9866666666666667, + "grad_norm": 1.0575454235076904, + "learning_rate": 4.536837232520893e-05, + "loss": 0.6859, + "step": 7030 + }, + { + "epoch": 0.9880701754385965, + "grad_norm": 0.7100856304168701, + "learning_rate": 4.535557405186831e-05, + "loss": 0.64, + "step": 7040 + }, + { + "epoch": 0.9894736842105263, + "grad_norm": 0.9754576683044434, + "learning_rate": 4.534275993101561e-05, + "loss": 0.5839, + "step": 7050 + }, + { + "epoch": 0.9908771929824561, + "grad_norm": 0.8776949644088745, + "learning_rate": 4.532992997262706e-05, + "loss": 0.577, + "step": 7060 + }, + { + "epoch": 0.992280701754386, + "grad_norm": 1.569716215133667, + "learning_rate": 4.531708418669122e-05, + "loss": 0.7155, + "step": 7070 + }, + { + "epoch": 0.9936842105263158, + "grad_norm": 0.7174299955368042, + "learning_rate": 4.5304222583208983e-05, + "loss": 0.6713, + "step": 7080 + }, + { + "epoch": 0.9950877192982456, + "grad_norm": 1.0695897340774536, + "learning_rate": 4.5291345172193546e-05, + "loss": 0.6528, + "step": 7090 + }, + { + "epoch": 0.9964912280701754, + "grad_norm": 1.0685267448425293, + "learning_rate": 4.5278451963670403e-05, + "loss": 0.5705, + "step": 7100 + }, + { + "epoch": 0.9978947368421053, + "grad_norm": 1.2662453651428223, + "learning_rate": 4.526554296767738e-05, + "loss": 0.6763, + "step": 7110 + }, + { + "epoch": 0.9992982456140351, + "grad_norm": 1.1944515705108643, + "learning_rate": 4.525261819426455e-05, + "loss": 0.5855, + "step": 7120 + }, + { + "epoch": 1.0007017543859649, + "grad_norm": 0.7510038614273071, + "learning_rate": 4.5239677653494305e-05, + "loss": 0.5631, + "step": 7130 + }, + { + "epoch": 1.0021052631578948, + "grad_norm": 0.7383008003234863, + "learning_rate": 4.5226721355441306e-05, + "loss": 0.493, + "step": 7140 + }, + { + "epoch": 1.0035087719298246, + "grad_norm": 1.3719711303710938, + "learning_rate": 4.5213749310192455e-05, + "loss": 0.5049, + "step": 7150 + }, + { + "epoch": 1.0049122807017543, + "grad_norm": 0.7755573987960815, + "learning_rate": 4.520076152784695e-05, + "loss": 0.5204, + "step": 7160 + }, + { + "epoch": 1.0063157894736843, + "grad_norm": 1.0142574310302734, + "learning_rate": 4.518775801851622e-05, + "loss": 0.5395, + "step": 7170 + }, + { + "epoch": 1.007719298245614, + "grad_norm": 1.0278340578079224, + "learning_rate": 4.517473879232395e-05, + "loss": 0.5231, + "step": 7180 + }, + { + "epoch": 1.0091228070175438, + "grad_norm": 1.4633328914642334, + "learning_rate": 4.516170385940603e-05, + "loss": 0.5764, + "step": 7190 + }, + { + "epoch": 1.0105263157894737, + "grad_norm": 1.5323199033737183, + "learning_rate": 4.514865322991063e-05, + "loss": 0.5339, + "step": 7200 + }, + { + "epoch": 1.0119298245614035, + "grad_norm": 1.379055380821228, + "learning_rate": 4.51355869139981e-05, + "loss": 0.5684, + "step": 7210 + }, + { + "epoch": 1.0133333333333334, + "grad_norm": 0.9581233859062195, + "learning_rate": 4.512250492184101e-05, + "loss": 0.539, + "step": 7220 + }, + { + "epoch": 1.0147368421052632, + "grad_norm": 1.1807743310928345, + "learning_rate": 4.510940726362416e-05, + "loss": 0.5348, + "step": 7230 + }, + { + "epoch": 1.016140350877193, + "grad_norm": 1.2164653539657593, + "learning_rate": 4.50962939495445e-05, + "loss": 0.5795, + "step": 7240 + }, + { + "epoch": 1.0175438596491229, + "grad_norm": 0.7895627617835999, + "learning_rate": 4.50831649898112e-05, + "loss": 0.5894, + "step": 7250 + }, + { + "epoch": 1.0189473684210526, + "grad_norm": 1.4003938436508179, + "learning_rate": 4.507002039464562e-05, + "loss": 0.5825, + "step": 7260 + }, + { + "epoch": 1.0203508771929826, + "grad_norm": 0.8824437856674194, + "learning_rate": 4.505686017428127e-05, + "loss": 0.5513, + "step": 7270 + }, + { + "epoch": 1.0217543859649123, + "grad_norm": 1.0241267681121826, + "learning_rate": 4.504368433896382e-05, + "loss": 0.6675, + "step": 7280 + }, + { + "epoch": 1.023157894736842, + "grad_norm": 1.1416174173355103, + "learning_rate": 4.5030492898951134e-05, + "loss": 0.5146, + "step": 7290 + }, + { + "epoch": 1.024561403508772, + "grad_norm": 1.4306304454803467, + "learning_rate": 4.501728586451318e-05, + "loss": 0.6254, + "step": 7300 + }, + { + "epoch": 1.0259649122807017, + "grad_norm": 0.7919867634773254, + "learning_rate": 4.5004063245932097e-05, + "loss": 0.4688, + "step": 7310 + }, + { + "epoch": 1.0273684210526315, + "grad_norm": 1.0270862579345703, + "learning_rate": 4.4990825053502136e-05, + "loss": 0.5227, + "step": 7320 + }, + { + "epoch": 1.0287719298245614, + "grad_norm": 1.332261085510254, + "learning_rate": 4.497757129752969e-05, + "loss": 0.5219, + "step": 7330 + }, + { + "epoch": 1.0301754385964912, + "grad_norm": 1.1045178174972534, + "learning_rate": 4.496430198833327e-05, + "loss": 0.5006, + "step": 7340 + }, + { + "epoch": 1.0315789473684212, + "grad_norm": 1.069557547569275, + "learning_rate": 4.495101713624348e-05, + "loss": 0.519, + "step": 7350 + }, + { + "epoch": 1.032982456140351, + "grad_norm": 1.0084444284439087, + "learning_rate": 4.493771675160303e-05, + "loss": 0.6042, + "step": 7360 + }, + { + "epoch": 1.0343859649122806, + "grad_norm": 1.1198923587799072, + "learning_rate": 4.4924400844766734e-05, + "loss": 0.5092, + "step": 7370 + }, + { + "epoch": 1.0357894736842106, + "grad_norm": 0.7310131788253784, + "learning_rate": 4.491106942610147e-05, + "loss": 0.6178, + "step": 7380 + }, + { + "epoch": 1.0371929824561403, + "grad_norm": 1.1642857789993286, + "learning_rate": 4.489772250598622e-05, + "loss": 0.6226, + "step": 7390 + }, + { + "epoch": 1.03859649122807, + "grad_norm": 1.194806456565857, + "learning_rate": 4.488436009481201e-05, + "loss": 0.5821, + "step": 7400 + }, + { + "epoch": 1.04, + "grad_norm": 1.3842540979385376, + "learning_rate": 4.487098220298193e-05, + "loss": 0.5265, + "step": 7410 + }, + { + "epoch": 1.0414035087719298, + "grad_norm": 1.546705722808838, + "learning_rate": 4.485758884091113e-05, + "loss": 0.5617, + "step": 7420 + }, + { + "epoch": 1.0428070175438597, + "grad_norm": 1.5181100368499756, + "learning_rate": 4.4844180019026805e-05, + "loss": 0.5468, + "step": 7430 + }, + { + "epoch": 1.0442105263157895, + "grad_norm": 1.8832321166992188, + "learning_rate": 4.483075574776819e-05, + "loss": 0.5048, + "step": 7440 + }, + { + "epoch": 1.0456140350877192, + "grad_norm": 1.2133930921554565, + "learning_rate": 4.4817316037586524e-05, + "loss": 0.5684, + "step": 7450 + }, + { + "epoch": 1.0470175438596492, + "grad_norm": 1.6424169540405273, + "learning_rate": 4.480386089894509e-05, + "loss": 0.5851, + "step": 7460 + }, + { + "epoch": 1.048421052631579, + "grad_norm": 1.2695761919021606, + "learning_rate": 4.479039034231918e-05, + "loss": 0.5308, + "step": 7470 + }, + { + "epoch": 1.0498245614035087, + "grad_norm": 1.3531373739242554, + "learning_rate": 4.477690437819607e-05, + "loss": 0.5904, + "step": 7480 + }, + { + "epoch": 1.0512280701754386, + "grad_norm": 1.6086102724075317, + "learning_rate": 4.476340301707507e-05, + "loss": 0.4894, + "step": 7490 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7145791053771973, + "learning_rate": 4.4749886269467416e-05, + "loss": 0.4906, + "step": 7500 + }, + { + "epoch": 1.0540350877192983, + "grad_norm": 1.5852210521697998, + "learning_rate": 4.473635414589639e-05, + "loss": 0.5399, + "step": 7510 + }, + { + "epoch": 1.055438596491228, + "grad_norm": 1.3225674629211426, + "learning_rate": 4.47228066568972e-05, + "loss": 0.5168, + "step": 7520 + }, + { + "epoch": 1.0568421052631578, + "grad_norm": 1.3213186264038086, + "learning_rate": 4.470924381301704e-05, + "loss": 0.4888, + "step": 7530 + }, + { + "epoch": 1.0582456140350878, + "grad_norm": 1.4983114004135132, + "learning_rate": 4.469566562481503e-05, + "loss": 0.4909, + "step": 7540 + }, + { + "epoch": 1.0596491228070175, + "grad_norm": 1.3175050020217896, + "learning_rate": 4.4682072102862286e-05, + "loss": 0.5369, + "step": 7550 + }, + { + "epoch": 1.0610526315789475, + "grad_norm": 1.14377760887146, + "learning_rate": 4.466846325774179e-05, + "loss": 0.5046, + "step": 7560 + }, + { + "epoch": 1.0624561403508772, + "grad_norm": 0.6691097021102905, + "learning_rate": 4.4654839100048535e-05, + "loss": 0.5201, + "step": 7570 + }, + { + "epoch": 1.063859649122807, + "grad_norm": 1.4467300176620483, + "learning_rate": 4.464119964038937e-05, + "loss": 0.5238, + "step": 7580 + }, + { + "epoch": 1.065263157894737, + "grad_norm": 0.8880655169487, + "learning_rate": 4.462754488938309e-05, + "loss": 0.5074, + "step": 7590 + }, + { + "epoch": 1.0666666666666667, + "grad_norm": 2.043294906616211, + "learning_rate": 4.4613874857660384e-05, + "loss": 0.5297, + "step": 7600 + }, + { + "epoch": 1.0680701754385964, + "grad_norm": 1.054681420326233, + "learning_rate": 4.460018955586384e-05, + "loss": 0.5585, + "step": 7610 + }, + { + "epoch": 1.0694736842105264, + "grad_norm": 1.256369709968567, + "learning_rate": 4.458648899464793e-05, + "loss": 0.4944, + "step": 7620 + }, + { + "epoch": 1.070877192982456, + "grad_norm": 1.0441490411758423, + "learning_rate": 4.457277318467903e-05, + "loss": 0.5736, + "step": 7630 + }, + { + "epoch": 1.072280701754386, + "grad_norm": 0.885286271572113, + "learning_rate": 4.4559042136635345e-05, + "loss": 0.6152, + "step": 7640 + }, + { + "epoch": 1.0736842105263158, + "grad_norm": 1.8804951906204224, + "learning_rate": 4.4545295861206975e-05, + "loss": 0.4936, + "step": 7650 + }, + { + "epoch": 1.0750877192982455, + "grad_norm": 1.5045465230941772, + "learning_rate": 4.453153436909587e-05, + "loss": 0.5547, + "step": 7660 + }, + { + "epoch": 1.0764912280701755, + "grad_norm": 1.7368062734603882, + "learning_rate": 4.4517757671015826e-05, + "loss": 0.537, + "step": 7670 + }, + { + "epoch": 1.0778947368421052, + "grad_norm": 1.3677830696105957, + "learning_rate": 4.4503965777692456e-05, + "loss": 0.5131, + "step": 7680 + }, + { + "epoch": 1.079298245614035, + "grad_norm": 1.2926596403121948, + "learning_rate": 4.449015869986325e-05, + "loss": 0.4782, + "step": 7690 + }, + { + "epoch": 1.080701754385965, + "grad_norm": 2.191722869873047, + "learning_rate": 4.447633644827747e-05, + "loss": 0.4962, + "step": 7700 + }, + { + "epoch": 1.0821052631578947, + "grad_norm": 1.8317209482192993, + "learning_rate": 4.446249903369621e-05, + "loss": 0.5025, + "step": 7710 + }, + { + "epoch": 1.0835087719298246, + "grad_norm": 1.2881171703338623, + "learning_rate": 4.444864646689239e-05, + "loss": 0.4816, + "step": 7720 + }, + { + "epoch": 1.0849122807017544, + "grad_norm": 1.1918405294418335, + "learning_rate": 4.443477875865071e-05, + "loss": 0.4762, + "step": 7730 + }, + { + "epoch": 1.0863157894736841, + "grad_norm": 1.1728036403656006, + "learning_rate": 4.4420895919767626e-05, + "loss": 0.4501, + "step": 7740 + }, + { + "epoch": 1.087719298245614, + "grad_norm": 1.505370855331421, + "learning_rate": 4.440699796105143e-05, + "loss": 0.4855, + "step": 7750 + }, + { + "epoch": 1.0891228070175438, + "grad_norm": 1.6580755710601807, + "learning_rate": 4.439308489332215e-05, + "loss": 0.5558, + "step": 7760 + }, + { + "epoch": 1.0905263157894738, + "grad_norm": 1.496596097946167, + "learning_rate": 4.437915672741158e-05, + "loss": 0.5219, + "step": 7770 + }, + { + "epoch": 1.0919298245614035, + "grad_norm": 1.2828936576843262, + "learning_rate": 4.43652134741633e-05, + "loss": 0.4643, + "step": 7780 + }, + { + "epoch": 1.0933333333333333, + "grad_norm": 1.2443900108337402, + "learning_rate": 4.435125514443258e-05, + "loss": 0.562, + "step": 7790 + }, + { + "epoch": 1.0947368421052632, + "grad_norm": 1.5212448835372925, + "learning_rate": 4.4337281749086477e-05, + "loss": 0.5022, + "step": 7800 + }, + { + "epoch": 1.096140350877193, + "grad_norm": 2.2898428440093994, + "learning_rate": 4.432329329900375e-05, + "loss": 0.5462, + "step": 7810 + }, + { + "epoch": 1.0975438596491227, + "grad_norm": 1.3173396587371826, + "learning_rate": 4.4309289805074895e-05, + "loss": 0.5103, + "step": 7820 + }, + { + "epoch": 1.0989473684210527, + "grad_norm": 1.3435895442962646, + "learning_rate": 4.42952712782021e-05, + "loss": 0.5588, + "step": 7830 + }, + { + "epoch": 1.1003508771929824, + "grad_norm": 1.1860660314559937, + "learning_rate": 4.428123772929928e-05, + "loss": 0.5107, + "step": 7840 + }, + { + "epoch": 1.1017543859649124, + "grad_norm": 1.2616344690322876, + "learning_rate": 4.426718916929202e-05, + "loss": 0.463, + "step": 7850 + }, + { + "epoch": 1.1031578947368421, + "grad_norm": 0.8766297101974487, + "learning_rate": 4.425312560911762e-05, + "loss": 0.4801, + "step": 7860 + }, + { + "epoch": 1.1045614035087719, + "grad_norm": 1.4563919305801392, + "learning_rate": 4.4239047059725035e-05, + "loss": 0.4755, + "step": 7870 + }, + { + "epoch": 1.1059649122807018, + "grad_norm": 1.3378584384918213, + "learning_rate": 4.422495353207491e-05, + "loss": 0.4449, + "step": 7880 + }, + { + "epoch": 1.1073684210526316, + "grad_norm": 1.259637713432312, + "learning_rate": 4.4210845037139525e-05, + "loss": 0.4613, + "step": 7890 + }, + { + "epoch": 1.1087719298245613, + "grad_norm": 1.8832120895385742, + "learning_rate": 4.419672158590282e-05, + "loss": 0.6132, + "step": 7900 + }, + { + "epoch": 1.1101754385964913, + "grad_norm": 1.7765206098556519, + "learning_rate": 4.4182583189360415e-05, + "loss": 0.5235, + "step": 7910 + }, + { + "epoch": 1.111578947368421, + "grad_norm": 1.5252950191497803, + "learning_rate": 4.416842985851951e-05, + "loss": 0.5066, + "step": 7920 + }, + { + "epoch": 1.112982456140351, + "grad_norm": 1.145727515220642, + "learning_rate": 4.415426160439897e-05, + "loss": 0.5148, + "step": 7930 + }, + { + "epoch": 1.1143859649122807, + "grad_norm": 1.413393259048462, + "learning_rate": 4.414007843802927e-05, + "loss": 0.4731, + "step": 7940 + }, + { + "epoch": 1.1157894736842104, + "grad_norm": 1.227738618850708, + "learning_rate": 4.412588037045248e-05, + "loss": 0.4657, + "step": 7950 + }, + { + "epoch": 1.1171929824561404, + "grad_norm": 1.1237843036651611, + "learning_rate": 4.411166741272228e-05, + "loss": 0.5292, + "step": 7960 + }, + { + "epoch": 1.1185964912280701, + "grad_norm": 1.5785701274871826, + "learning_rate": 4.4097439575903964e-05, + "loss": 0.5086, + "step": 7970 + }, + { + "epoch": 1.12, + "grad_norm": 0.8202313780784607, + "learning_rate": 4.408319687107437e-05, + "loss": 0.4074, + "step": 7980 + }, + { + "epoch": 1.1214035087719298, + "grad_norm": 1.6983180046081543, + "learning_rate": 4.406893930932195e-05, + "loss": 0.5302, + "step": 7990 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 1.3259834051132202, + "learning_rate": 4.4054666901746685e-05, + "loss": 0.5533, + "step": 8000 + }, + { + "epoch": 1.1228070175438596, + "eval_loss": 0.6547604203224182, + "eval_runtime": 43.9015, + "eval_samples_per_second": 34.167, + "eval_steps_per_second": 8.542, + "step": 8000 + }, + { + "epoch": 1.1242105263157895, + "grad_norm": 1.5617778301239014, + "learning_rate": 4.404037965946015e-05, + "loss": 0.5533, + "step": 8010 + }, + { + "epoch": 1.1256140350877193, + "grad_norm": 1.3925784826278687, + "learning_rate": 4.402607759358545e-05, + "loss": 0.573, + "step": 8020 + }, + { + "epoch": 1.127017543859649, + "grad_norm": 2.007066011428833, + "learning_rate": 4.401176071525722e-05, + "loss": 0.6002, + "step": 8030 + }, + { + "epoch": 1.128421052631579, + "grad_norm": 1.2479066848754883, + "learning_rate": 4.399742903562166e-05, + "loss": 0.5412, + "step": 8040 + }, + { + "epoch": 1.1298245614035087, + "grad_norm": 2.116882562637329, + "learning_rate": 4.3983082565836454e-05, + "loss": 0.5516, + "step": 8050 + }, + { + "epoch": 1.1312280701754387, + "grad_norm": 1.0991559028625488, + "learning_rate": 4.3968721317070835e-05, + "loss": 0.5142, + "step": 8060 + }, + { + "epoch": 1.1326315789473684, + "grad_norm": 1.5136295557022095, + "learning_rate": 4.395434530050553e-05, + "loss": 0.4974, + "step": 8070 + }, + { + "epoch": 1.1340350877192982, + "grad_norm": 1.6304662227630615, + "learning_rate": 4.393995452733274e-05, + "loss": 0.5921, + "step": 8080 + }, + { + "epoch": 1.1354385964912281, + "grad_norm": 1.1499663591384888, + "learning_rate": 4.392554900875619e-05, + "loss": 0.5516, + "step": 8090 + }, + { + "epoch": 1.1368421052631579, + "grad_norm": 1.14556884765625, + "learning_rate": 4.3911128755991085e-05, + "loss": 0.4377, + "step": 8100 + }, + { + "epoch": 1.1382456140350876, + "grad_norm": 2.168900728225708, + "learning_rate": 4.3896693780264054e-05, + "loss": 0.5489, + "step": 8110 + }, + { + "epoch": 1.1396491228070176, + "grad_norm": 1.8360158205032349, + "learning_rate": 4.388224409281324e-05, + "loss": 0.4883, + "step": 8120 + }, + { + "epoch": 1.1410526315789473, + "grad_norm": 1.3180638551712036, + "learning_rate": 4.3867779704888225e-05, + "loss": 0.5316, + "step": 8130 + }, + { + "epoch": 1.1424561403508773, + "grad_norm": 1.194568157196045, + "learning_rate": 4.385330062775001e-05, + "loss": 0.5961, + "step": 8140 + }, + { + "epoch": 1.143859649122807, + "grad_norm": 1.7998569011688232, + "learning_rate": 4.383880687267107e-05, + "loss": 0.5839, + "step": 8150 + }, + { + "epoch": 1.1452631578947368, + "grad_norm": 1.313109040260315, + "learning_rate": 4.3824298450935284e-05, + "loss": 0.4834, + "step": 8160 + }, + { + "epoch": 1.1466666666666667, + "grad_norm": 1.082961916923523, + "learning_rate": 4.380977537383796e-05, + "loss": 0.5543, + "step": 8170 + }, + { + "epoch": 1.1480701754385965, + "grad_norm": 1.8646924495697021, + "learning_rate": 4.37952376526858e-05, + "loss": 0.5548, + "step": 8180 + }, + { + "epoch": 1.1494736842105264, + "grad_norm": 0.8285521268844604, + "learning_rate": 4.378068529879693e-05, + "loss": 0.4596, + "step": 8190 + }, + { + "epoch": 1.1508771929824562, + "grad_norm": 1.6364754438400269, + "learning_rate": 4.376611832350085e-05, + "loss": 0.5165, + "step": 8200 + }, + { + "epoch": 1.152280701754386, + "grad_norm": 1.4383785724639893, + "learning_rate": 4.3751536738138454e-05, + "loss": 0.5085, + "step": 8210 + }, + { + "epoch": 1.1536842105263159, + "grad_norm": 1.0523866415023804, + "learning_rate": 4.3736940554062e-05, + "loss": 0.5485, + "step": 8220 + }, + { + "epoch": 1.1550877192982456, + "grad_norm": 2.191441535949707, + "learning_rate": 4.372232978263513e-05, + "loss": 0.5049, + "step": 8230 + }, + { + "epoch": 1.1564912280701753, + "grad_norm": 1.923846960067749, + "learning_rate": 4.3707704435232816e-05, + "loss": 0.4833, + "step": 8240 + }, + { + "epoch": 1.1578947368421053, + "grad_norm": 1.855549693107605, + "learning_rate": 4.36930645232414e-05, + "loss": 0.5616, + "step": 8250 + }, + { + "epoch": 1.159298245614035, + "grad_norm": 1.620718240737915, + "learning_rate": 4.367841005805855e-05, + "loss": 0.5448, + "step": 8260 + }, + { + "epoch": 1.1607017543859648, + "grad_norm": 1.5233041048049927, + "learning_rate": 4.366374105109327e-05, + "loss": 0.4714, + "step": 8270 + }, + { + "epoch": 1.1621052631578948, + "grad_norm": 1.2673170566558838, + "learning_rate": 4.364905751376589e-05, + "loss": 0.5994, + "step": 8280 + }, + { + "epoch": 1.1635087719298245, + "grad_norm": 1.4488414525985718, + "learning_rate": 4.3634359457508046e-05, + "loss": 0.5633, + "step": 8290 + }, + { + "epoch": 1.1649122807017545, + "grad_norm": 2.300537586212158, + "learning_rate": 4.3619646893762675e-05, + "loss": 0.5566, + "step": 8300 + }, + { + "epoch": 1.1663157894736842, + "grad_norm": 0.8948672413825989, + "learning_rate": 4.360491983398402e-05, + "loss": 0.4631, + "step": 8310 + }, + { + "epoch": 1.167719298245614, + "grad_norm": 1.453062653541565, + "learning_rate": 4.3590178289637585e-05, + "loss": 0.4525, + "step": 8320 + }, + { + "epoch": 1.169122807017544, + "grad_norm": 1.201952576637268, + "learning_rate": 4.357542227220019e-05, + "loss": 0.4501, + "step": 8330 + }, + { + "epoch": 1.1705263157894736, + "grad_norm": 1.3226593732833862, + "learning_rate": 4.356065179315988e-05, + "loss": 0.561, + "step": 8340 + }, + { + "epoch": 1.1719298245614036, + "grad_norm": 0.9465067386627197, + "learning_rate": 4.354586686401599e-05, + "loss": 0.513, + "step": 8350 + }, + { + "epoch": 1.1733333333333333, + "grad_norm": 1.12758207321167, + "learning_rate": 4.353106749627909e-05, + "loss": 0.4915, + "step": 8360 + }, + { + "epoch": 1.174736842105263, + "grad_norm": 2.4800631999969482, + "learning_rate": 4.3516253701471e-05, + "loss": 0.5193, + "step": 8370 + }, + { + "epoch": 1.176140350877193, + "grad_norm": 0.8092995285987854, + "learning_rate": 4.350142549112476e-05, + "loss": 0.5881, + "step": 8380 + }, + { + "epoch": 1.1775438596491228, + "grad_norm": 1.0919562578201294, + "learning_rate": 4.348658287678465e-05, + "loss": 0.5378, + "step": 8390 + }, + { + "epoch": 1.1789473684210527, + "grad_norm": 1.3849503993988037, + "learning_rate": 4.347172587000614e-05, + "loss": 0.5356, + "step": 8400 + }, + { + "epoch": 1.1803508771929825, + "grad_norm": 2.6931231021881104, + "learning_rate": 4.345685448235594e-05, + "loss": 0.5849, + "step": 8410 + }, + { + "epoch": 1.1817543859649122, + "grad_norm": 1.188615083694458, + "learning_rate": 4.3441968725411905e-05, + "loss": 0.5157, + "step": 8420 + }, + { + "epoch": 1.1831578947368422, + "grad_norm": 2.472364902496338, + "learning_rate": 4.342706861076313e-05, + "loss": 0.6508, + "step": 8430 + }, + { + "epoch": 1.184561403508772, + "grad_norm": 1.9964373111724854, + "learning_rate": 4.341215415000987e-05, + "loss": 0.525, + "step": 8440 + }, + { + "epoch": 1.1859649122807017, + "grad_norm": 1.2414706945419312, + "learning_rate": 4.339722535476353e-05, + "loss": 0.6218, + "step": 8450 + }, + { + "epoch": 1.1873684210526316, + "grad_norm": 1.69329035282135, + "learning_rate": 4.3382282236646684e-05, + "loss": 0.5375, + "step": 8460 + }, + { + "epoch": 1.1887719298245614, + "grad_norm": 0.9698866605758667, + "learning_rate": 4.336732480729306e-05, + "loss": 0.5454, + "step": 8470 + }, + { + "epoch": 1.190175438596491, + "grad_norm": 1.450108528137207, + "learning_rate": 4.335235307834755e-05, + "loss": 0.5507, + "step": 8480 + }, + { + "epoch": 1.191578947368421, + "grad_norm": 1.593243956565857, + "learning_rate": 4.333736706146615e-05, + "loss": 0.5172, + "step": 8490 + }, + { + "epoch": 1.1929824561403508, + "grad_norm": 0.9896023869514465, + "learning_rate": 4.332236676831598e-05, + "loss": 0.5401, + "step": 8500 + }, + { + "epoch": 1.1943859649122808, + "grad_norm": 1.9976292848587036, + "learning_rate": 4.330735221057529e-05, + "loss": 0.4734, + "step": 8510 + }, + { + "epoch": 1.1957894736842105, + "grad_norm": 0.9883520007133484, + "learning_rate": 4.329232339993342e-05, + "loss": 0.4783, + "step": 8520 + }, + { + "epoch": 1.1971929824561403, + "grad_norm": 1.6024073362350464, + "learning_rate": 4.327728034809082e-05, + "loss": 0.5843, + "step": 8530 + }, + { + "epoch": 1.1985964912280702, + "grad_norm": 1.5246341228485107, + "learning_rate": 4.326222306675902e-05, + "loss": 0.4922, + "step": 8540 + }, + { + "epoch": 1.2, + "grad_norm": 1.8065810203552246, + "learning_rate": 4.324715156766064e-05, + "loss": 0.6196, + "step": 8550 + }, + { + "epoch": 1.20140350877193, + "grad_norm": 1.25635826587677, + "learning_rate": 4.3232065862529334e-05, + "loss": 0.4713, + "step": 8560 + }, + { + "epoch": 1.2028070175438597, + "grad_norm": 1.874711036682129, + "learning_rate": 4.321696596310987e-05, + "loss": 0.5015, + "step": 8570 + }, + { + "epoch": 1.2042105263157894, + "grad_norm": 1.4795438051223755, + "learning_rate": 4.3201851881158004e-05, + "loss": 0.569, + "step": 8580 + }, + { + "epoch": 1.2056140350877194, + "grad_norm": 1.1996725797653198, + "learning_rate": 4.31867236284406e-05, + "loss": 0.5079, + "step": 8590 + }, + { + "epoch": 1.207017543859649, + "grad_norm": 1.1284021139144897, + "learning_rate": 4.31715812167355e-05, + "loss": 0.5132, + "step": 8600 + }, + { + "epoch": 1.208421052631579, + "grad_norm": 1.3568930625915527, + "learning_rate": 4.3156424657831596e-05, + "loss": 0.5907, + "step": 8610 + }, + { + "epoch": 1.2098245614035088, + "grad_norm": 2.9363083839416504, + "learning_rate": 4.3141253963528795e-05, + "loss": 0.6086, + "step": 8620 + }, + { + "epoch": 1.2112280701754385, + "grad_norm": 1.58176589012146, + "learning_rate": 4.3126069145637987e-05, + "loss": 0.4966, + "step": 8630 + }, + { + "epoch": 1.2126315789473685, + "grad_norm": 1.1019052267074585, + "learning_rate": 4.3110870215981095e-05, + "loss": 0.5713, + "step": 8640 + }, + { + "epoch": 1.2140350877192982, + "grad_norm": 1.8327674865722656, + "learning_rate": 4.309565718639098e-05, + "loss": 0.5538, + "step": 8650 + }, + { + "epoch": 1.215438596491228, + "grad_norm": 1.4098116159439087, + "learning_rate": 4.308043006871153e-05, + "loss": 0.5065, + "step": 8660 + }, + { + "epoch": 1.216842105263158, + "grad_norm": 1.7343579530715942, + "learning_rate": 4.306518887479758e-05, + "loss": 0.495, + "step": 8670 + }, + { + "epoch": 1.2182456140350877, + "grad_norm": 1.1002309322357178, + "learning_rate": 4.3049933616514895e-05, + "loss": 0.5217, + "step": 8680 + }, + { + "epoch": 1.2196491228070174, + "grad_norm": 1.6965640783309937, + "learning_rate": 4.303466430574024e-05, + "loss": 0.5196, + "step": 8690 + }, + { + "epoch": 1.2210526315789474, + "grad_norm": 2.227039337158203, + "learning_rate": 4.301938095436129e-05, + "loss": 0.4687, + "step": 8700 + }, + { + "epoch": 1.2224561403508771, + "grad_norm": 1.9886293411254883, + "learning_rate": 4.300408357427666e-05, + "loss": 0.6043, + "step": 8710 + }, + { + "epoch": 1.223859649122807, + "grad_norm": 1.9546360969543457, + "learning_rate": 4.298877217739587e-05, + "loss": 0.5359, + "step": 8720 + }, + { + "epoch": 1.2252631578947368, + "grad_norm": 1.1686962842941284, + "learning_rate": 4.29734467756394e-05, + "loss": 0.4502, + "step": 8730 + }, + { + "epoch": 1.2266666666666666, + "grad_norm": 1.1630245447158813, + "learning_rate": 4.2958107380938564e-05, + "loss": 0.4823, + "step": 8740 + }, + { + "epoch": 1.2280701754385965, + "grad_norm": 1.3308658599853516, + "learning_rate": 4.294275400523564e-05, + "loss": 0.4295, + "step": 8750 + }, + { + "epoch": 1.2294736842105263, + "grad_norm": 1.694773554801941, + "learning_rate": 4.2927386660483726e-05, + "loss": 0.4983, + "step": 8760 + }, + { + "epoch": 1.2308771929824562, + "grad_norm": 0.8299292325973511, + "learning_rate": 4.291200535864684e-05, + "loss": 0.5405, + "step": 8770 + }, + { + "epoch": 1.232280701754386, + "grad_norm": 1.7378555536270142, + "learning_rate": 4.289661011169986e-05, + "loss": 0.5094, + "step": 8780 + }, + { + "epoch": 1.2336842105263157, + "grad_norm": 1.8789598941802979, + "learning_rate": 4.28812009316285e-05, + "loss": 0.5622, + "step": 8790 + }, + { + "epoch": 1.2350877192982457, + "grad_norm": 1.2347322702407837, + "learning_rate": 4.286577783042934e-05, + "loss": 0.4577, + "step": 8800 + }, + { + "epoch": 1.2364912280701754, + "grad_norm": 1.610954761505127, + "learning_rate": 4.285034082010981e-05, + "loss": 0.6015, + "step": 8810 + }, + { + "epoch": 1.2378947368421054, + "grad_norm": 0.8974846005439758, + "learning_rate": 4.2834889912688126e-05, + "loss": 0.5716, + "step": 8820 + }, + { + "epoch": 1.2392982456140351, + "grad_norm": 1.1207072734832764, + "learning_rate": 4.281942512019336e-05, + "loss": 0.5634, + "step": 8830 + }, + { + "epoch": 1.2407017543859649, + "grad_norm": 2.1318647861480713, + "learning_rate": 4.2803946454665376e-05, + "loss": 0.4982, + "step": 8840 + }, + { + "epoch": 1.2421052631578948, + "grad_norm": 1.3747590780258179, + "learning_rate": 4.2788453928154855e-05, + "loss": 0.5006, + "step": 8850 + }, + { + "epoch": 1.2435087719298246, + "grad_norm": 0.8502065539360046, + "learning_rate": 4.2772947552723266e-05, + "loss": 0.4901, + "step": 8860 + }, + { + "epoch": 1.2449122807017543, + "grad_norm": 1.888156771659851, + "learning_rate": 4.275742734044283e-05, + "loss": 0.4847, + "step": 8870 + }, + { + "epoch": 1.2463157894736843, + "grad_norm": 2.0071113109588623, + "learning_rate": 4.274189330339658e-05, + "loss": 0.5224, + "step": 8880 + }, + { + "epoch": 1.247719298245614, + "grad_norm": 1.0914371013641357, + "learning_rate": 4.272634545367831e-05, + "loss": 0.4698, + "step": 8890 + }, + { + "epoch": 1.2491228070175437, + "grad_norm": 1.4466750621795654, + "learning_rate": 4.271078380339252e-05, + "loss": 0.5801, + "step": 8900 + }, + { + "epoch": 1.2505263157894737, + "grad_norm": 1.5080820322036743, + "learning_rate": 4.269520836465452e-05, + "loss": 0.6584, + "step": 8910 + }, + { + "epoch": 1.2519298245614034, + "grad_norm": 1.510321855545044, + "learning_rate": 4.2679619149590304e-05, + "loss": 0.5752, + "step": 8920 + }, + { + "epoch": 1.2533333333333334, + "grad_norm": 1.527969479560852, + "learning_rate": 4.266401617033662e-05, + "loss": 0.4829, + "step": 8930 + }, + { + "epoch": 1.2547368421052632, + "grad_norm": 1.2593231201171875, + "learning_rate": 4.264839943904091e-05, + "loss": 0.5411, + "step": 8940 + }, + { + "epoch": 1.256140350877193, + "grad_norm": 1.490929365158081, + "learning_rate": 4.2632768967861345e-05, + "loss": 0.5089, + "step": 8950 + }, + { + "epoch": 1.2575438596491229, + "grad_norm": 0.7822336554527283, + "learning_rate": 4.261712476896679e-05, + "loss": 0.6257, + "step": 8960 + }, + { + "epoch": 1.2589473684210526, + "grad_norm": 1.331175446510315, + "learning_rate": 4.2601466854536774e-05, + "loss": 0.5403, + "step": 8970 + }, + { + "epoch": 1.2603508771929826, + "grad_norm": 1.4372813701629639, + "learning_rate": 4.2585795236761526e-05, + "loss": 0.5305, + "step": 8980 + }, + { + "epoch": 1.2617543859649123, + "grad_norm": 1.873630166053772, + "learning_rate": 4.257010992784194e-05, + "loss": 0.5776, + "step": 8990 + }, + { + "epoch": 1.263157894736842, + "grad_norm": 1.2255460023880005, + "learning_rate": 4.255441093998956e-05, + "loss": 0.5772, + "step": 9000 + }, + { + "epoch": 1.264561403508772, + "grad_norm": 1.3667577505111694, + "learning_rate": 4.253869828542659e-05, + "loss": 0.589, + "step": 9010 + }, + { + "epoch": 1.2659649122807017, + "grad_norm": 1.189122200012207, + "learning_rate": 4.2522971976385876e-05, + "loss": 0.5012, + "step": 9020 + }, + { + "epoch": 1.2673684210526317, + "grad_norm": 2.0150930881500244, + "learning_rate": 4.250723202511089e-05, + "loss": 0.4813, + "step": 9030 + }, + { + "epoch": 1.2687719298245614, + "grad_norm": 1.83956778049469, + "learning_rate": 4.2491478443855704e-05, + "loss": 0.513, + "step": 9040 + }, + { + "epoch": 1.2701754385964912, + "grad_norm": 1.8281301259994507, + "learning_rate": 4.247571124488504e-05, + "loss": 0.6229, + "step": 9050 + }, + { + "epoch": 1.271578947368421, + "grad_norm": 1.5498483180999756, + "learning_rate": 4.2459930440474194e-05, + "loss": 0.5493, + "step": 9060 + }, + { + "epoch": 1.2729824561403509, + "grad_norm": 1.680643081665039, + "learning_rate": 4.2444136042909064e-05, + "loss": 0.4845, + "step": 9070 + }, + { + "epoch": 1.2743859649122806, + "grad_norm": 1.4468814134597778, + "learning_rate": 4.2428328064486134e-05, + "loss": 0.5174, + "step": 9080 + }, + { + "epoch": 1.2757894736842106, + "grad_norm": 1.8637295961380005, + "learning_rate": 4.2412506517512456e-05, + "loss": 0.501, + "step": 9090 + }, + { + "epoch": 1.2771929824561403, + "grad_norm": 1.8078296184539795, + "learning_rate": 4.239667141430564e-05, + "loss": 0.6422, + "step": 9100 + }, + { + "epoch": 1.27859649122807, + "grad_norm": 1.8999830484390259, + "learning_rate": 4.238082276719387e-05, + "loss": 0.5323, + "step": 9110 + }, + { + "epoch": 1.28, + "grad_norm": 0.8832138776779175, + "learning_rate": 4.236496058851585e-05, + "loss": 0.4542, + "step": 9120 + }, + { + "epoch": 1.2814035087719298, + "grad_norm": 1.2980352640151978, + "learning_rate": 4.234908489062083e-05, + "loss": 0.5697, + "step": 9130 + }, + { + "epoch": 1.2828070175438597, + "grad_norm": 1.667039394378662, + "learning_rate": 4.233319568586859e-05, + "loss": 0.5108, + "step": 9140 + }, + { + "epoch": 1.2842105263157895, + "grad_norm": 1.6664785146713257, + "learning_rate": 4.231729298662942e-05, + "loss": 0.4472, + "step": 9150 + }, + { + "epoch": 1.2856140350877192, + "grad_norm": 1.4384082555770874, + "learning_rate": 4.230137680528411e-05, + "loss": 0.62, + "step": 9160 + }, + { + "epoch": 1.2870175438596492, + "grad_norm": 1.851901888847351, + "learning_rate": 4.228544715422395e-05, + "loss": 0.5226, + "step": 9170 + }, + { + "epoch": 1.288421052631579, + "grad_norm": 1.372755527496338, + "learning_rate": 4.2269504045850744e-05, + "loss": 0.5492, + "step": 9180 + }, + { + "epoch": 1.2898245614035089, + "grad_norm": 1.7806882858276367, + "learning_rate": 4.225354749257673e-05, + "loss": 0.5359, + "step": 9190 + }, + { + "epoch": 1.2912280701754386, + "grad_norm": 1.4967597723007202, + "learning_rate": 4.2237577506824624e-05, + "loss": 0.535, + "step": 9200 + }, + { + "epoch": 1.2926315789473684, + "grad_norm": 1.220828890800476, + "learning_rate": 4.222159410102761e-05, + "loss": 0.4581, + "step": 9210 + }, + { + "epoch": 1.2940350877192983, + "grad_norm": 1.9541898965835571, + "learning_rate": 4.220559728762933e-05, + "loss": 0.5109, + "step": 9220 + }, + { + "epoch": 1.295438596491228, + "grad_norm": 0.9027903079986572, + "learning_rate": 4.2189587079083846e-05, + "loss": 0.4501, + "step": 9230 + }, + { + "epoch": 1.296842105263158, + "grad_norm": 2.4572014808654785, + "learning_rate": 4.217356348785565e-05, + "loss": 0.5574, + "step": 9240 + }, + { + "epoch": 1.2982456140350878, + "grad_norm": 1.5705862045288086, + "learning_rate": 4.215752652641967e-05, + "loss": 0.5558, + "step": 9250 + }, + { + "epoch": 1.2996491228070175, + "grad_norm": 1.5693955421447754, + "learning_rate": 4.21414762072612e-05, + "loss": 0.4734, + "step": 9260 + }, + { + "epoch": 1.3010526315789472, + "grad_norm": 1.4699418544769287, + "learning_rate": 4.2125412542876e-05, + "loss": 0.574, + "step": 9270 + }, + { + "epoch": 1.3024561403508772, + "grad_norm": 1.8956423997879028, + "learning_rate": 4.210933554577016e-05, + "loss": 0.4505, + "step": 9280 + }, + { + "epoch": 1.303859649122807, + "grad_norm": 1.8722734451293945, + "learning_rate": 4.209324522846018e-05, + "loss": 0.5021, + "step": 9290 + }, + { + "epoch": 1.305263157894737, + "grad_norm": 1.3624267578125, + "learning_rate": 4.207714160347292e-05, + "loss": 0.4925, + "step": 9300 + }, + { + "epoch": 1.3066666666666666, + "grad_norm": 2.2316009998321533, + "learning_rate": 4.206102468334561e-05, + "loss": 0.6289, + "step": 9310 + }, + { + "epoch": 1.3080701754385964, + "grad_norm": 2.3992788791656494, + "learning_rate": 4.2044894480625825e-05, + "loss": 0.5387, + "step": 9320 + }, + { + "epoch": 1.3094736842105263, + "grad_norm": 1.9581996202468872, + "learning_rate": 4.202875100787147e-05, + "loss": 0.5788, + "step": 9330 + }, + { + "epoch": 1.310877192982456, + "grad_norm": 1.5940258502960205, + "learning_rate": 4.201259427765081e-05, + "loss": 0.5313, + "step": 9340 + }, + { + "epoch": 1.312280701754386, + "grad_norm": 1.2207392454147339, + "learning_rate": 4.1996424302542404e-05, + "loss": 0.5948, + "step": 9350 + }, + { + "epoch": 1.3136842105263158, + "grad_norm": 1.743915319442749, + "learning_rate": 4.198024109513512e-05, + "loss": 0.4913, + "step": 9360 + }, + { + "epoch": 1.3150877192982455, + "grad_norm": 1.9989562034606934, + "learning_rate": 4.196404466802816e-05, + "loss": 0.4895, + "step": 9370 + }, + { + "epoch": 1.3164912280701755, + "grad_norm": 1.8793307542800903, + "learning_rate": 4.194783503383098e-05, + "loss": 0.5537, + "step": 9380 + }, + { + "epoch": 1.3178947368421052, + "grad_norm": 1.9246269464492798, + "learning_rate": 4.193161220516334e-05, + "loss": 0.5641, + "step": 9390 + }, + { + "epoch": 1.3192982456140352, + "grad_norm": 1.5612519979476929, + "learning_rate": 4.191537619465529e-05, + "loss": 0.464, + "step": 9400 + }, + { + "epoch": 1.320701754385965, + "grad_norm": 0.9451802968978882, + "learning_rate": 4.189912701494709e-05, + "loss": 0.4657, + "step": 9410 + }, + { + "epoch": 1.3221052631578947, + "grad_norm": 1.790861964225769, + "learning_rate": 4.1882864678689296e-05, + "loss": 0.5113, + "step": 9420 + }, + { + "epoch": 1.3235087719298246, + "grad_norm": 1.9305384159088135, + "learning_rate": 4.186658919854269e-05, + "loss": 0.5593, + "step": 9430 + }, + { + "epoch": 1.3249122807017544, + "grad_norm": 2.051849603652954, + "learning_rate": 4.1850300587178304e-05, + "loss": 0.4578, + "step": 9440 + }, + { + "epoch": 1.3263157894736843, + "grad_norm": 1.7359352111816406, + "learning_rate": 4.183399885727737e-05, + "loss": 0.5637, + "step": 9450 + }, + { + "epoch": 1.327719298245614, + "grad_norm": 1.373119592666626, + "learning_rate": 4.181768402153135e-05, + "loss": 0.5491, + "step": 9460 + }, + { + "epoch": 1.3291228070175438, + "grad_norm": 1.0692249536514282, + "learning_rate": 4.1801356092641886e-05, + "loss": 0.5558, + "step": 9470 + }, + { + "epoch": 1.3305263157894736, + "grad_norm": 1.0997167825698853, + "learning_rate": 4.178501508332085e-05, + "loss": 0.4543, + "step": 9480 + }, + { + "epoch": 1.3319298245614035, + "grad_norm": 1.5097479820251465, + "learning_rate": 4.176866100629027e-05, + "loss": 0.5832, + "step": 9490 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 1.5708390474319458, + "learning_rate": 4.175229387428235e-05, + "loss": 0.5378, + "step": 9500 + }, + { + "epoch": 1.3347368421052632, + "grad_norm": 1.6936376094818115, + "learning_rate": 4.1735913700039477e-05, + "loss": 0.5046, + "step": 9510 + }, + { + "epoch": 1.336140350877193, + "grad_norm": 0.9409717321395874, + "learning_rate": 4.171952049631416e-05, + "loss": 0.5171, + "step": 9520 + }, + { + "epoch": 1.3375438596491227, + "grad_norm": 1.803077220916748, + "learning_rate": 4.170311427586908e-05, + "loss": 0.5939, + "step": 9530 + }, + { + "epoch": 1.3389473684210527, + "grad_norm": 1.1349605321884155, + "learning_rate": 4.168669505147705e-05, + "loss": 0.5768, + "step": 9540 + }, + { + "epoch": 1.3403508771929824, + "grad_norm": 1.6885027885437012, + "learning_rate": 4.1670262835920996e-05, + "loss": 0.5029, + "step": 9550 + }, + { + "epoch": 1.3417543859649124, + "grad_norm": 1.278064489364624, + "learning_rate": 4.1653817641993936e-05, + "loss": 0.4611, + "step": 9560 + }, + { + "epoch": 1.343157894736842, + "grad_norm": 1.7429572343826294, + "learning_rate": 4.163735948249905e-05, + "loss": 0.5701, + "step": 9570 + }, + { + "epoch": 1.3445614035087718, + "grad_norm": 2.2477900981903076, + "learning_rate": 4.162088837024956e-05, + "loss": 0.5356, + "step": 9580 + }, + { + "epoch": 1.3459649122807018, + "grad_norm": 1.617583990097046, + "learning_rate": 4.16044043180688e-05, + "loss": 0.4985, + "step": 9590 + }, + { + "epoch": 1.3473684210526315, + "grad_norm": 1.5791269540786743, + "learning_rate": 4.158790733879017e-05, + "loss": 0.5036, + "step": 9600 + }, + { + "epoch": 1.3487719298245615, + "grad_norm": 1.9323596954345703, + "learning_rate": 4.1571397445257124e-05, + "loss": 0.5212, + "step": 9610 + }, + { + "epoch": 1.3501754385964913, + "grad_norm": 1.3054085969924927, + "learning_rate": 4.155487465032319e-05, + "loss": 0.5225, + "step": 9620 + }, + { + "epoch": 1.351578947368421, + "grad_norm": 1.5751895904541016, + "learning_rate": 4.153833896685193e-05, + "loss": 0.4985, + "step": 9630 + }, + { + "epoch": 1.352982456140351, + "grad_norm": 1.8643230199813843, + "learning_rate": 4.1521790407716936e-05, + "loss": 0.5386, + "step": 9640 + }, + { + "epoch": 1.3543859649122807, + "grad_norm": 1.0118595361709595, + "learning_rate": 4.150522898580183e-05, + "loss": 0.5283, + "step": 9650 + }, + { + "epoch": 1.3557894736842107, + "grad_norm": 1.9065098762512207, + "learning_rate": 4.148865471400024e-05, + "loss": 0.5684, + "step": 9660 + }, + { + "epoch": 1.3571929824561404, + "grad_norm": 2.0933990478515625, + "learning_rate": 4.147206760521582e-05, + "loss": 0.525, + "step": 9670 + }, + { + "epoch": 1.3585964912280701, + "grad_norm": 2.0099165439605713, + "learning_rate": 4.145546767236219e-05, + "loss": 0.5258, + "step": 9680 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 2.2704153060913086, + "learning_rate": 4.143885492836297e-05, + "loss": 0.5159, + "step": 9690 + }, + { + "epoch": 1.3614035087719298, + "grad_norm": 1.3344398736953735, + "learning_rate": 4.1422229386151754e-05, + "loss": 0.5656, + "step": 9700 + }, + { + "epoch": 1.3628070175438596, + "grad_norm": 2.356660842895508, + "learning_rate": 4.140559105867209e-05, + "loss": 0.488, + "step": 9710 + }, + { + "epoch": 1.3642105263157895, + "grad_norm": 1.0358322858810425, + "learning_rate": 4.1388939958877495e-05, + "loss": 0.457, + "step": 9720 + }, + { + "epoch": 1.3656140350877193, + "grad_norm": 1.4958525896072388, + "learning_rate": 4.137227609973141e-05, + "loss": 0.459, + "step": 9730 + }, + { + "epoch": 1.367017543859649, + "grad_norm": 1.9942265748977661, + "learning_rate": 4.135559949420723e-05, + "loss": 0.4794, + "step": 9740 + }, + { + "epoch": 1.368421052631579, + "grad_norm": 1.7793415784835815, + "learning_rate": 4.133891015528826e-05, + "loss": 0.5903, + "step": 9750 + }, + { + "epoch": 1.3698245614035087, + "grad_norm": 1.069421410560608, + "learning_rate": 4.132220809596772e-05, + "loss": 0.5521, + "step": 9760 + }, + { + "epoch": 1.3712280701754387, + "grad_norm": 0.8958350419998169, + "learning_rate": 4.1305493329248734e-05, + "loss": 0.4667, + "step": 9770 + }, + { + "epoch": 1.3726315789473684, + "grad_norm": 1.314070701599121, + "learning_rate": 4.128876586814433e-05, + "loss": 0.4307, + "step": 9780 + }, + { + "epoch": 1.3740350877192982, + "grad_norm": 1.3073476552963257, + "learning_rate": 4.127202572567741e-05, + "loss": 0.5016, + "step": 9790 + }, + { + "epoch": 1.3754385964912281, + "grad_norm": 1.695670247077942, + "learning_rate": 4.1255272914880735e-05, + "loss": 0.5489, + "step": 9800 + }, + { + "epoch": 1.3768421052631579, + "grad_norm": 1.6946247816085815, + "learning_rate": 4.1238507448796945e-05, + "loss": 0.488, + "step": 9810 + }, + { + "epoch": 1.3782456140350878, + "grad_norm": 1.3960559368133545, + "learning_rate": 4.122172934047855e-05, + "loss": 0.5739, + "step": 9820 + }, + { + "epoch": 1.3796491228070176, + "grad_norm": 1.4782212972640991, + "learning_rate": 4.120493860298786e-05, + "loss": 0.5036, + "step": 9830 + }, + { + "epoch": 1.3810526315789473, + "grad_norm": 1.7010905742645264, + "learning_rate": 4.1188135249397056e-05, + "loss": 0.4737, + "step": 9840 + }, + { + "epoch": 1.3824561403508773, + "grad_norm": 1.8230018615722656, + "learning_rate": 4.117131929278811e-05, + "loss": 0.5341, + "step": 9850 + }, + { + "epoch": 1.383859649122807, + "grad_norm": 1.5947978496551514, + "learning_rate": 4.1154490746252825e-05, + "loss": 0.4567, + "step": 9860 + }, + { + "epoch": 1.385263157894737, + "grad_norm": 2.077136516571045, + "learning_rate": 4.113764962289281e-05, + "loss": 0.5586, + "step": 9870 + }, + { + "epoch": 1.3866666666666667, + "grad_norm": 1.0836787223815918, + "learning_rate": 4.112079593581944e-05, + "loss": 0.5065, + "step": 9880 + }, + { + "epoch": 1.3880701754385965, + "grad_norm": 1.8127710819244385, + "learning_rate": 4.110392969815391e-05, + "loss": 0.5335, + "step": 9890 + }, + { + "epoch": 1.3894736842105262, + "grad_norm": 1.7939358949661255, + "learning_rate": 4.108705092302715e-05, + "loss": 0.5445, + "step": 9900 + }, + { + "epoch": 1.3908771929824562, + "grad_norm": 1.5963224172592163, + "learning_rate": 4.1070159623579855e-05, + "loss": 0.5146, + "step": 9910 + }, + { + "epoch": 1.392280701754386, + "grad_norm": 1.4976569414138794, + "learning_rate": 4.105325581296251e-05, + "loss": 0.4938, + "step": 9920 + }, + { + "epoch": 1.3936842105263159, + "grad_norm": 1.3835642337799072, + "learning_rate": 4.103633950433528e-05, + "loss": 0.5353, + "step": 9930 + }, + { + "epoch": 1.3950877192982456, + "grad_norm": 1.504701018333435, + "learning_rate": 4.1019410710868115e-05, + "loss": 0.4869, + "step": 9940 + }, + { + "epoch": 1.3964912280701753, + "grad_norm": 1.6695371866226196, + "learning_rate": 4.100246944574064e-05, + "loss": 0.4858, + "step": 9950 + }, + { + "epoch": 1.3978947368421053, + "grad_norm": 1.033554196357727, + "learning_rate": 4.098551572214223e-05, + "loss": 0.5173, + "step": 9960 + }, + { + "epoch": 1.399298245614035, + "grad_norm": 2.1895320415496826, + "learning_rate": 4.0968549553271926e-05, + "loss": 0.5862, + "step": 9970 + }, + { + "epoch": 1.400701754385965, + "grad_norm": 2.323758363723755, + "learning_rate": 4.095157095233848e-05, + "loss": 0.5312, + "step": 9980 + }, + { + "epoch": 1.4021052631578947, + "grad_norm": 1.7012853622436523, + "learning_rate": 4.093457993256031e-05, + "loss": 0.4668, + "step": 9990 + }, + { + "epoch": 1.4035087719298245, + "grad_norm": 1.5631529092788696, + "learning_rate": 4.0917576507165514e-05, + "loss": 0.5192, + "step": 10000 + }, + { + "epoch": 1.4035087719298245, + "eval_loss": 0.6501449942588806, + "eval_runtime": 43.9031, + "eval_samples_per_second": 34.166, + "eval_steps_per_second": 8.542, + "step": 10000 + }, + { + "epoch": 1.4049122807017544, + "grad_norm": 1.5768696069717407, + "learning_rate": 4.090056068939183e-05, + "loss": 0.6369, + "step": 10010 + }, + { + "epoch": 1.4063157894736842, + "grad_norm": 2.0955562591552734, + "learning_rate": 4.088353249248667e-05, + "loss": 0.4765, + "step": 10020 + }, + { + "epoch": 1.4077192982456141, + "grad_norm": 1.7173198461532593, + "learning_rate": 4.0866491929707064e-05, + "loss": 0.4858, + "step": 10030 + }, + { + "epoch": 1.4091228070175439, + "grad_norm": 1.091640591621399, + "learning_rate": 4.084943901431966e-05, + "loss": 0.4502, + "step": 10040 + }, + { + "epoch": 1.4105263157894736, + "grad_norm": 0.9591197967529297, + "learning_rate": 4.083237375960075e-05, + "loss": 0.5036, + "step": 10050 + }, + { + "epoch": 1.4119298245614036, + "grad_norm": 1.6289422512054443, + "learning_rate": 4.081529617883622e-05, + "loss": 0.5185, + "step": 10060 + }, + { + "epoch": 1.4133333333333333, + "grad_norm": 1.0051218271255493, + "learning_rate": 4.079820628532155e-05, + "loss": 0.4701, + "step": 10070 + }, + { + "epoch": 1.4147368421052633, + "grad_norm": 1.5907773971557617, + "learning_rate": 4.0781104092361813e-05, + "loss": 0.5406, + "step": 10080 + }, + { + "epoch": 1.416140350877193, + "grad_norm": 1.4398341178894043, + "learning_rate": 4.0763989613271635e-05, + "loss": 0.4963, + "step": 10090 + }, + { + "epoch": 1.4175438596491228, + "grad_norm": 1.8131810426712036, + "learning_rate": 4.0746862861375245e-05, + "loss": 0.5931, + "step": 10100 + }, + { + "epoch": 1.4189473684210525, + "grad_norm": 1.3968654870986938, + "learning_rate": 4.07297238500064e-05, + "loss": 0.4908, + "step": 10110 + }, + { + "epoch": 1.4203508771929825, + "grad_norm": 1.4002443552017212, + "learning_rate": 4.0712572592508394e-05, + "loss": 0.5732, + "step": 10120 + }, + { + "epoch": 1.4217543859649122, + "grad_norm": 1.8497573137283325, + "learning_rate": 4.069540910223409e-05, + "loss": 0.5323, + "step": 10130 + }, + { + "epoch": 1.4231578947368422, + "grad_norm": 1.6966348886489868, + "learning_rate": 4.067823339254584e-05, + "loss": 0.5727, + "step": 10140 + }, + { + "epoch": 1.424561403508772, + "grad_norm": 1.1128507852554321, + "learning_rate": 4.066104547681553e-05, + "loss": 0.5295, + "step": 10150 + }, + { + "epoch": 1.4259649122807017, + "grad_norm": 1.2667880058288574, + "learning_rate": 4.0643845368424545e-05, + "loss": 0.554, + "step": 10160 + }, + { + "epoch": 1.4273684210526316, + "grad_norm": 2.0188159942626953, + "learning_rate": 4.062663308076374e-05, + "loss": 0.5138, + "step": 10170 + }, + { + "epoch": 1.4287719298245614, + "grad_norm": 1.113797903060913, + "learning_rate": 4.0609408627233494e-05, + "loss": 0.543, + "step": 10180 + }, + { + "epoch": 1.4301754385964913, + "grad_norm": 1.800862193107605, + "learning_rate": 4.059217202124361e-05, + "loss": 0.5094, + "step": 10190 + }, + { + "epoch": 1.431578947368421, + "grad_norm": 1.7962946891784668, + "learning_rate": 4.0574923276213405e-05, + "loss": 0.5468, + "step": 10200 + }, + { + "epoch": 1.4329824561403508, + "grad_norm": 2.258661985397339, + "learning_rate": 4.0557662405571595e-05, + "loss": 0.6082, + "step": 10210 + }, + { + "epoch": 1.4343859649122808, + "grad_norm": 2.073396921157837, + "learning_rate": 4.054038942275637e-05, + "loss": 0.5164, + "step": 10220 + }, + { + "epoch": 1.4357894736842105, + "grad_norm": 1.6468226909637451, + "learning_rate": 4.052310434121533e-05, + "loss": 0.5451, + "step": 10230 + }, + { + "epoch": 1.4371929824561405, + "grad_norm": 2.4547080993652344, + "learning_rate": 4.050580717440552e-05, + "loss": 0.5821, + "step": 10240 + }, + { + "epoch": 1.4385964912280702, + "grad_norm": 1.6606553792953491, + "learning_rate": 4.048849793579337e-05, + "loss": 0.5088, + "step": 10250 + }, + { + "epoch": 1.44, + "grad_norm": 1.6139086484909058, + "learning_rate": 4.04711766388547e-05, + "loss": 0.5441, + "step": 10260 + }, + { + "epoch": 1.4414035087719297, + "grad_norm": 1.6367645263671875, + "learning_rate": 4.0453843297074756e-05, + "loss": 0.494, + "step": 10270 + }, + { + "epoch": 1.4428070175438596, + "grad_norm": 1.6214492321014404, + "learning_rate": 4.043649792394812e-05, + "loss": 0.571, + "step": 10280 + }, + { + "epoch": 1.4442105263157896, + "grad_norm": 1.8566523790359497, + "learning_rate": 4.041914053297878e-05, + "loss": 0.5845, + "step": 10290 + }, + { + "epoch": 1.4456140350877194, + "grad_norm": 1.105668067932129, + "learning_rate": 4.0401771137680046e-05, + "loss": 0.4655, + "step": 10300 + }, + { + "epoch": 1.447017543859649, + "grad_norm": 1.5444446802139282, + "learning_rate": 4.038438975157458e-05, + "loss": 0.4939, + "step": 10310 + }, + { + "epoch": 1.4484210526315788, + "grad_norm": 2.6764674186706543, + "learning_rate": 4.036699638819441e-05, + "loss": 0.6172, + "step": 10320 + }, + { + "epoch": 1.4498245614035088, + "grad_norm": 1.574623942375183, + "learning_rate": 4.0349591061080846e-05, + "loss": 0.4888, + "step": 10330 + }, + { + "epoch": 1.4512280701754385, + "grad_norm": 2.2457685470581055, + "learning_rate": 4.0332173783784536e-05, + "loss": 0.4427, + "step": 10340 + }, + { + "epoch": 1.4526315789473685, + "grad_norm": 1.68437659740448, + "learning_rate": 4.031474456986543e-05, + "loss": 0.4867, + "step": 10350 + }, + { + "epoch": 1.4540350877192982, + "grad_norm": 1.4421491622924805, + "learning_rate": 4.0297303432892775e-05, + "loss": 0.4401, + "step": 10360 + }, + { + "epoch": 1.455438596491228, + "grad_norm": 2.1541783809661865, + "learning_rate": 4.027985038644507e-05, + "loss": 0.546, + "step": 10370 + }, + { + "epoch": 1.456842105263158, + "grad_norm": 1.7601039409637451, + "learning_rate": 4.026238544411014e-05, + "loss": 0.5211, + "step": 10380 + }, + { + "epoch": 1.4582456140350877, + "grad_norm": 1.2212331295013428, + "learning_rate": 4.024490861948503e-05, + "loss": 0.4633, + "step": 10390 + }, + { + "epoch": 1.4596491228070176, + "grad_norm": 1.688339114189148, + "learning_rate": 4.022741992617603e-05, + "loss": 0.5898, + "step": 10400 + }, + { + "epoch": 1.4610526315789474, + "grad_norm": 0.9874732494354248, + "learning_rate": 4.020991937779872e-05, + "loss": 0.4944, + "step": 10410 + }, + { + "epoch": 1.4624561403508771, + "grad_norm": 0.9479324817657471, + "learning_rate": 4.019240698797785e-05, + "loss": 0.55, + "step": 10420 + }, + { + "epoch": 1.463859649122807, + "grad_norm": 2.4362101554870605, + "learning_rate": 4.017488277034742e-05, + "loss": 0.5103, + "step": 10430 + }, + { + "epoch": 1.4652631578947368, + "grad_norm": 1.659631371498108, + "learning_rate": 4.015734673855065e-05, + "loss": 0.5073, + "step": 10440 + }, + { + "epoch": 1.4666666666666668, + "grad_norm": 1.9145371913909912, + "learning_rate": 4.013979890623992e-05, + "loss": 0.5588, + "step": 10450 + }, + { + "epoch": 1.4680701754385965, + "grad_norm": 1.3370527029037476, + "learning_rate": 4.0122239287076834e-05, + "loss": 0.5984, + "step": 10460 + }, + { + "epoch": 1.4694736842105263, + "grad_norm": 1.9445977210998535, + "learning_rate": 4.010466789473215e-05, + "loss": 0.5437, + "step": 10470 + }, + { + "epoch": 1.470877192982456, + "grad_norm": 1.197405457496643, + "learning_rate": 4.008708474288581e-05, + "loss": 0.4573, + "step": 10480 + }, + { + "epoch": 1.472280701754386, + "grad_norm": 1.8886786699295044, + "learning_rate": 4.006948984522687e-05, + "loss": 0.5319, + "step": 10490 + }, + { + "epoch": 1.4736842105263157, + "grad_norm": 1.8042242527008057, + "learning_rate": 4.00518832154536e-05, + "loss": 0.4559, + "step": 10500 + }, + { + "epoch": 1.4750877192982457, + "grad_norm": 1.6905839443206787, + "learning_rate": 4.003426486727335e-05, + "loss": 0.4776, + "step": 10510 + }, + { + "epoch": 1.4764912280701754, + "grad_norm": 1.3357295989990234, + "learning_rate": 4.00166348144026e-05, + "loss": 0.5672, + "step": 10520 + }, + { + "epoch": 1.4778947368421052, + "grad_norm": 1.399383783340454, + "learning_rate": 3.9998993070566954e-05, + "loss": 0.5926, + "step": 10530 + }, + { + "epoch": 1.4792982456140351, + "grad_norm": 1.8520910739898682, + "learning_rate": 3.998133964950112e-05, + "loss": 0.4975, + "step": 10540 + }, + { + "epoch": 1.4807017543859649, + "grad_norm": 0.865352988243103, + "learning_rate": 3.9963674564948886e-05, + "loss": 0.5229, + "step": 10550 + }, + { + "epoch": 1.4821052631578948, + "grad_norm": 2.3526158332824707, + "learning_rate": 3.9945997830663126e-05, + "loss": 0.4995, + "step": 10560 + }, + { + "epoch": 1.4835087719298246, + "grad_norm": 2.1046128273010254, + "learning_rate": 3.992830946040579e-05, + "loss": 0.5464, + "step": 10570 + }, + { + "epoch": 1.4849122807017543, + "grad_norm": 2.0516717433929443, + "learning_rate": 3.9910609467947866e-05, + "loss": 0.5691, + "step": 10580 + }, + { + "epoch": 1.4863157894736843, + "grad_norm": 1.386889100074768, + "learning_rate": 3.989289786706942e-05, + "loss": 0.5982, + "step": 10590 + }, + { + "epoch": 1.487719298245614, + "grad_norm": 1.7038406133651733, + "learning_rate": 3.987517467155954e-05, + "loss": 0.5173, + "step": 10600 + }, + { + "epoch": 1.489122807017544, + "grad_norm": 1.3720016479492188, + "learning_rate": 3.985743989521633e-05, + "loss": 0.4406, + "step": 10610 + }, + { + "epoch": 1.4905263157894737, + "grad_norm": 1.5871185064315796, + "learning_rate": 3.9839693551846924e-05, + "loss": 0.5417, + "step": 10620 + }, + { + "epoch": 1.4919298245614034, + "grad_norm": 1.440131664276123, + "learning_rate": 3.982193565526747e-05, + "loss": 0.6226, + "step": 10630 + }, + { + "epoch": 1.4933333333333334, + "grad_norm": 1.91805100440979, + "learning_rate": 3.9804166219303086e-05, + "loss": 0.5337, + "step": 10640 + }, + { + "epoch": 1.4947368421052631, + "grad_norm": 1.9551035165786743, + "learning_rate": 3.9786385257787886e-05, + "loss": 0.5027, + "step": 10650 + }, + { + "epoch": 1.496140350877193, + "grad_norm": 1.93511962890625, + "learning_rate": 3.9768592784564974e-05, + "loss": 0.5676, + "step": 10660 + }, + { + "epoch": 1.4975438596491228, + "grad_norm": 1.6610795259475708, + "learning_rate": 3.975078881348638e-05, + "loss": 0.4542, + "step": 10670 + }, + { + "epoch": 1.4989473684210526, + "grad_norm": 1.6202747821807861, + "learning_rate": 3.9732973358413115e-05, + "loss": 0.5563, + "step": 10680 + }, + { + "epoch": 1.5003508771929823, + "grad_norm": 1.8837049007415771, + "learning_rate": 3.971514643321513e-05, + "loss": 0.4128, + "step": 10690 + }, + { + "epoch": 1.5017543859649123, + "grad_norm": 1.5047054290771484, + "learning_rate": 3.969730805177129e-05, + "loss": 0.4909, + "step": 10700 + }, + { + "epoch": 1.5031578947368422, + "grad_norm": 1.5055679082870483, + "learning_rate": 3.967945822796938e-05, + "loss": 0.5664, + "step": 10710 + }, + { + "epoch": 1.504561403508772, + "grad_norm": 1.2587159872055054, + "learning_rate": 3.9661596975706104e-05, + "loss": 0.5827, + "step": 10720 + }, + { + "epoch": 1.5059649122807017, + "grad_norm": 1.2444645166397095, + "learning_rate": 3.9643724308887065e-05, + "loss": 0.5105, + "step": 10730 + }, + { + "epoch": 1.5073684210526315, + "grad_norm": 2.214508295059204, + "learning_rate": 3.962584024142675e-05, + "loss": 0.5455, + "step": 10740 + }, + { + "epoch": 1.5087719298245614, + "grad_norm": 1.7483428716659546, + "learning_rate": 3.96079447872485e-05, + "loss": 0.5419, + "step": 10750 + }, + { + "epoch": 1.5101754385964914, + "grad_norm": 2.044471263885498, + "learning_rate": 3.9590037960284546e-05, + "loss": 0.5861, + "step": 10760 + }, + { + "epoch": 1.5115789473684211, + "grad_norm": 2.063427209854126, + "learning_rate": 3.9572119774475975e-05, + "loss": 0.5692, + "step": 10770 + }, + { + "epoch": 1.5129824561403509, + "grad_norm": 2.0187671184539795, + "learning_rate": 3.95541902437727e-05, + "loss": 0.5345, + "step": 10780 + }, + { + "epoch": 1.5143859649122806, + "grad_norm": 1.9781345129013062, + "learning_rate": 3.953624938213348e-05, + "loss": 0.5212, + "step": 10790 + }, + { + "epoch": 1.5157894736842106, + "grad_norm": 1.9213941097259521, + "learning_rate": 3.95182972035259e-05, + "loss": 0.4838, + "step": 10800 + }, + { + "epoch": 1.5171929824561403, + "grad_norm": 2.619076728820801, + "learning_rate": 3.950033372192633e-05, + "loss": 0.5011, + "step": 10810 + }, + { + "epoch": 1.5185964912280703, + "grad_norm": 1.8988882303237915, + "learning_rate": 3.948235895131997e-05, + "loss": 0.5043, + "step": 10820 + }, + { + "epoch": 1.52, + "grad_norm": 1.4304900169372559, + "learning_rate": 3.946437290570078e-05, + "loss": 0.5062, + "step": 10830 + }, + { + "epoch": 1.5214035087719298, + "grad_norm": 3.060408353805542, + "learning_rate": 3.944637559907152e-05, + "loss": 0.6164, + "step": 10840 + }, + { + "epoch": 1.5228070175438595, + "grad_norm": 0.9590080976486206, + "learning_rate": 3.9428367045443704e-05, + "loss": 0.5159, + "step": 10850 + }, + { + "epoch": 1.5242105263157895, + "grad_norm": 2.1461575031280518, + "learning_rate": 3.941034725883762e-05, + "loss": 0.6505, + "step": 10860 + }, + { + "epoch": 1.5256140350877194, + "grad_norm": 1.578477144241333, + "learning_rate": 3.939231625328229e-05, + "loss": 0.4808, + "step": 10870 + }, + { + "epoch": 1.5270175438596492, + "grad_norm": 1.8510093688964844, + "learning_rate": 3.9374274042815465e-05, + "loss": 0.5194, + "step": 10880 + }, + { + "epoch": 1.528421052631579, + "grad_norm": 1.9132167100906372, + "learning_rate": 3.935622064148361e-05, + "loss": 0.5079, + "step": 10890 + }, + { + "epoch": 1.5298245614035086, + "grad_norm": 1.3352388143539429, + "learning_rate": 3.9338156063341946e-05, + "loss": 0.4808, + "step": 10900 + }, + { + "epoch": 1.5312280701754386, + "grad_norm": 2.102167844772339, + "learning_rate": 3.932008032245434e-05, + "loss": 0.429, + "step": 10910 + }, + { + "epoch": 1.5326315789473686, + "grad_norm": 1.9585574865341187, + "learning_rate": 3.930199343289339e-05, + "loss": 0.489, + "step": 10920 + }, + { + "epoch": 1.5340350877192983, + "grad_norm": 1.905050277709961, + "learning_rate": 3.9283895408740355e-05, + "loss": 0.4881, + "step": 10930 + }, + { + "epoch": 1.535438596491228, + "grad_norm": 1.964416742324829, + "learning_rate": 3.926578626408517e-05, + "loss": 0.5913, + "step": 10940 + }, + { + "epoch": 1.5368421052631578, + "grad_norm": 2.3363118171691895, + "learning_rate": 3.924766601302642e-05, + "loss": 0.4719, + "step": 10950 + }, + { + "epoch": 1.5382456140350877, + "grad_norm": 1.7316786050796509, + "learning_rate": 3.9229534669671344e-05, + "loss": 0.5445, + "step": 10960 + }, + { + "epoch": 1.5396491228070175, + "grad_norm": 2.2813808917999268, + "learning_rate": 3.9211392248135815e-05, + "loss": 0.4989, + "step": 10970 + }, + { + "epoch": 1.5410526315789475, + "grad_norm": 0.9021309018135071, + "learning_rate": 3.9193238762544325e-05, + "loss": 0.5321, + "step": 10980 + }, + { + "epoch": 1.5424561403508772, + "grad_norm": 2.614776134490967, + "learning_rate": 3.9175074227029996e-05, + "loss": 0.4765, + "step": 10990 + }, + { + "epoch": 1.543859649122807, + "grad_norm": 2.1491498947143555, + "learning_rate": 3.915689865573454e-05, + "loss": 0.5748, + "step": 11000 + }, + { + "epoch": 1.545263157894737, + "grad_norm": 1.7609467506408691, + "learning_rate": 3.913871206280824e-05, + "loss": 0.7091, + "step": 11010 + }, + { + "epoch": 1.5466666666666666, + "grad_norm": 2.5683560371398926, + "learning_rate": 3.912051446241001e-05, + "loss": 0.507, + "step": 11020 + }, + { + "epoch": 1.5480701754385966, + "grad_norm": 1.3458114862442017, + "learning_rate": 3.910230586870729e-05, + "loss": 0.5738, + "step": 11030 + }, + { + "epoch": 1.5494736842105263, + "grad_norm": 1.7107462882995605, + "learning_rate": 3.90840862958761e-05, + "loss": 0.4437, + "step": 11040 + }, + { + "epoch": 1.550877192982456, + "grad_norm": 1.6637877225875854, + "learning_rate": 3.9065855758101e-05, + "loss": 0.4859, + "step": 11050 + }, + { + "epoch": 1.5522807017543858, + "grad_norm": 2.1268763542175293, + "learning_rate": 3.904761426957509e-05, + "loss": 0.5433, + "step": 11060 + }, + { + "epoch": 1.5536842105263158, + "grad_norm": 1.8485718965530396, + "learning_rate": 3.902936184449999e-05, + "loss": 0.5938, + "step": 11070 + }, + { + "epoch": 1.5550877192982457, + "grad_norm": 1.9369820356369019, + "learning_rate": 3.901109849708585e-05, + "loss": 0.4484, + "step": 11080 + }, + { + "epoch": 1.5564912280701755, + "grad_norm": 1.5223256349563599, + "learning_rate": 3.8992824241551295e-05, + "loss": 0.4353, + "step": 11090 + }, + { + "epoch": 1.5578947368421052, + "grad_norm": 1.2845451831817627, + "learning_rate": 3.897453909212348e-05, + "loss": 0.4497, + "step": 11100 + }, + { + "epoch": 1.559298245614035, + "grad_norm": 1.2751349210739136, + "learning_rate": 3.895624306303799e-05, + "loss": 0.4648, + "step": 11110 + }, + { + "epoch": 1.560701754385965, + "grad_norm": 1.6384958028793335, + "learning_rate": 3.893793616853894e-05, + "loss": 0.5921, + "step": 11120 + }, + { + "epoch": 1.5621052631578949, + "grad_norm": 1.817355990409851, + "learning_rate": 3.891961842287886e-05, + "loss": 0.4611, + "step": 11130 + }, + { + "epoch": 1.5635087719298246, + "grad_norm": 1.7115503549575806, + "learning_rate": 3.890128984031876e-05, + "loss": 0.4745, + "step": 11140 + }, + { + "epoch": 1.5649122807017544, + "grad_norm": 1.7166131734848022, + "learning_rate": 3.888295043512804e-05, + "loss": 0.5716, + "step": 11150 + }, + { + "epoch": 1.566315789473684, + "grad_norm": 1.8528428077697754, + "learning_rate": 3.886460022158458e-05, + "loss": 0.5193, + "step": 11160 + }, + { + "epoch": 1.567719298245614, + "grad_norm": 1.9985193014144897, + "learning_rate": 3.884623921397463e-05, + "loss": 0.4974, + "step": 11170 + }, + { + "epoch": 1.5691228070175438, + "grad_norm": 1.4072109460830688, + "learning_rate": 3.882786742659289e-05, + "loss": 0.4418, + "step": 11180 + }, + { + "epoch": 1.5705263157894738, + "grad_norm": 1.3553410768508911, + "learning_rate": 3.880948487374241e-05, + "loss": 0.5278, + "step": 11190 + }, + { + "epoch": 1.5719298245614035, + "grad_norm": 1.6441354751586914, + "learning_rate": 3.8791091569734625e-05, + "loss": 0.476, + "step": 11200 + }, + { + "epoch": 1.5733333333333333, + "grad_norm": 1.4078179597854614, + "learning_rate": 3.8772687528889385e-05, + "loss": 0.581, + "step": 11210 + }, + { + "epoch": 1.5747368421052632, + "grad_norm": 2.096179723739624, + "learning_rate": 3.875427276553485e-05, + "loss": 0.5076, + "step": 11220 + }, + { + "epoch": 1.576140350877193, + "grad_norm": 1.3418902158737183, + "learning_rate": 3.873584729400753e-05, + "loss": 0.5177, + "step": 11230 + }, + { + "epoch": 1.577543859649123, + "grad_norm": 2.1806328296661377, + "learning_rate": 3.8717411128652304e-05, + "loss": 0.5348, + "step": 11240 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 1.6312358379364014, + "learning_rate": 3.869896428382236e-05, + "loss": 0.4699, + "step": 11250 + }, + { + "epoch": 1.5803508771929824, + "grad_norm": 2.4073407649993896, + "learning_rate": 3.8680506773879184e-05, + "loss": 0.5403, + "step": 11260 + }, + { + "epoch": 1.5817543859649121, + "grad_norm": 2.2572133541107178, + "learning_rate": 3.8662038613192596e-05, + "loss": 0.4982, + "step": 11270 + }, + { + "epoch": 1.583157894736842, + "grad_norm": 1.9021124839782715, + "learning_rate": 3.8643559816140685e-05, + "loss": 0.5025, + "step": 11280 + }, + { + "epoch": 1.584561403508772, + "grad_norm": 1.5274001359939575, + "learning_rate": 3.862507039710982e-05, + "loss": 0.4716, + "step": 11290 + }, + { + "epoch": 1.5859649122807018, + "grad_norm": 1.0260459184646606, + "learning_rate": 3.860657037049466e-05, + "loss": 0.5378, + "step": 11300 + }, + { + "epoch": 1.5873684210526315, + "grad_norm": 2.2560298442840576, + "learning_rate": 3.85880597506981e-05, + "loss": 0.5017, + "step": 11310 + }, + { + "epoch": 1.5887719298245613, + "grad_norm": 1.3712495565414429, + "learning_rate": 3.856953855213131e-05, + "loss": 0.4612, + "step": 11320 + }, + { + "epoch": 1.5901754385964912, + "grad_norm": 1.3681050539016724, + "learning_rate": 3.855100678921365e-05, + "loss": 0.5077, + "step": 11330 + }, + { + "epoch": 1.5915789473684212, + "grad_norm": 2.0733203887939453, + "learning_rate": 3.8532464476372765e-05, + "loss": 0.5643, + "step": 11340 + }, + { + "epoch": 1.592982456140351, + "grad_norm": 2.131699323654175, + "learning_rate": 3.851391162804445e-05, + "loss": 0.4939, + "step": 11350 + }, + { + "epoch": 1.5943859649122807, + "grad_norm": 1.787874698638916, + "learning_rate": 3.849534825867275e-05, + "loss": 0.5191, + "step": 11360 + }, + { + "epoch": 1.5957894736842104, + "grad_norm": 1.7359322309494019, + "learning_rate": 3.847677438270988e-05, + "loss": 0.5361, + "step": 11370 + }, + { + "epoch": 1.5971929824561404, + "grad_norm": 1.7936285734176636, + "learning_rate": 3.845819001461625e-05, + "loss": 0.5005, + "step": 11380 + }, + { + "epoch": 1.5985964912280701, + "grad_norm": 0.9876174330711365, + "learning_rate": 3.8439595168860406e-05, + "loss": 0.491, + "step": 11390 + }, + { + "epoch": 1.6, + "grad_norm": 1.5902925729751587, + "learning_rate": 3.842098985991909e-05, + "loss": 0.5636, + "step": 11400 + }, + { + "epoch": 1.6014035087719298, + "grad_norm": 2.157257556915283, + "learning_rate": 3.840237410227717e-05, + "loss": 0.5482, + "step": 11410 + }, + { + "epoch": 1.6028070175438596, + "grad_norm": 2.4455907344818115, + "learning_rate": 3.838374791042764e-05, + "loss": 0.4854, + "step": 11420 + }, + { + "epoch": 1.6042105263157893, + "grad_norm": 2.3983774185180664, + "learning_rate": 3.8365111298871645e-05, + "loss": 0.5535, + "step": 11430 + }, + { + "epoch": 1.6056140350877193, + "grad_norm": 1.6250687837600708, + "learning_rate": 3.834646428211841e-05, + "loss": 0.5493, + "step": 11440 + }, + { + "epoch": 1.6070175438596492, + "grad_norm": 0.9640924334526062, + "learning_rate": 3.83278068746853e-05, + "loss": 0.5126, + "step": 11450 + }, + { + "epoch": 1.608421052631579, + "grad_norm": 1.3491884469985962, + "learning_rate": 3.830913909109772e-05, + "loss": 0.5692, + "step": 11460 + }, + { + "epoch": 1.6098245614035087, + "grad_norm": 1.2153112888336182, + "learning_rate": 3.8290460945889186e-05, + "loss": 0.4367, + "step": 11470 + }, + { + "epoch": 1.6112280701754385, + "grad_norm": 1.869314193725586, + "learning_rate": 3.827177245360129e-05, + "loss": 0.5275, + "step": 11480 + }, + { + "epoch": 1.6126315789473684, + "grad_norm": 2.1873159408569336, + "learning_rate": 3.825307362878364e-05, + "loss": 0.5663, + "step": 11490 + }, + { + "epoch": 1.6140350877192984, + "grad_norm": 1.1323574781417847, + "learning_rate": 3.823436448599393e-05, + "loss": 0.4986, + "step": 11500 + }, + { + "epoch": 1.6154385964912281, + "grad_norm": 1.424310326576233, + "learning_rate": 3.8215645039797874e-05, + "loss": 0.5401, + "step": 11510 + }, + { + "epoch": 1.6168421052631579, + "grad_norm": 1.8369444608688354, + "learning_rate": 3.8196915304769184e-05, + "loss": 0.534, + "step": 11520 + }, + { + "epoch": 1.6182456140350876, + "grad_norm": 1.404891848564148, + "learning_rate": 3.817817529548962e-05, + "loss": 0.457, + "step": 11530 + }, + { + "epoch": 1.6196491228070176, + "grad_norm": 1.5708239078521729, + "learning_rate": 3.815942502654889e-05, + "loss": 0.5023, + "step": 11540 + }, + { + "epoch": 1.6210526315789475, + "grad_norm": 1.4378736019134521, + "learning_rate": 3.8140664512544746e-05, + "loss": 0.4885, + "step": 11550 + }, + { + "epoch": 1.6224561403508773, + "grad_norm": 2.0573270320892334, + "learning_rate": 3.8121893768082896e-05, + "loss": 0.5204, + "step": 11560 + }, + { + "epoch": 1.623859649122807, + "grad_norm": 1.4034109115600586, + "learning_rate": 3.8103112807776986e-05, + "loss": 0.4611, + "step": 11570 + }, + { + "epoch": 1.6252631578947367, + "grad_norm": 1.8642430305480957, + "learning_rate": 3.8084321646248654e-05, + "loss": 0.4999, + "step": 11580 + }, + { + "epoch": 1.6266666666666667, + "grad_norm": 2.0132107734680176, + "learning_rate": 3.806552029812747e-05, + "loss": 0.5241, + "step": 11590 + }, + { + "epoch": 1.6280701754385964, + "grad_norm": 1.3874375820159912, + "learning_rate": 3.804670877805091e-05, + "loss": 0.5275, + "step": 11600 + }, + { + "epoch": 1.6294736842105264, + "grad_norm": 1.296034574508667, + "learning_rate": 3.802788710066439e-05, + "loss": 0.4517, + "step": 11610 + }, + { + "epoch": 1.6308771929824561, + "grad_norm": 1.7629979848861694, + "learning_rate": 3.800905528062123e-05, + "loss": 0.4437, + "step": 11620 + }, + { + "epoch": 1.6322807017543859, + "grad_norm": 1.788439393043518, + "learning_rate": 3.7990213332582665e-05, + "loss": 0.5334, + "step": 11630 + }, + { + "epoch": 1.6336842105263156, + "grad_norm": 1.6043287515640259, + "learning_rate": 3.7971361271217775e-05, + "loss": 0.5915, + "step": 11640 + }, + { + "epoch": 1.6350877192982456, + "grad_norm": 1.1127432584762573, + "learning_rate": 3.7952499111203544e-05, + "loss": 0.633, + "step": 11650 + }, + { + "epoch": 1.6364912280701756, + "grad_norm": 1.6562187671661377, + "learning_rate": 3.793362686722483e-05, + "loss": 0.523, + "step": 11660 + }, + { + "epoch": 1.6378947368421053, + "grad_norm": 2.0622971057891846, + "learning_rate": 3.7914744553974284e-05, + "loss": 0.5025, + "step": 11670 + }, + { + "epoch": 1.639298245614035, + "grad_norm": 2.0334134101867676, + "learning_rate": 3.789585218615246e-05, + "loss": 0.5153, + "step": 11680 + }, + { + "epoch": 1.6407017543859648, + "grad_norm": 1.7830958366394043, + "learning_rate": 3.787694977846771e-05, + "loss": 0.5783, + "step": 11690 + }, + { + "epoch": 1.6421052631578947, + "grad_norm": 2.2819151878356934, + "learning_rate": 3.78580373456362e-05, + "loss": 0.5333, + "step": 11700 + }, + { + "epoch": 1.6435087719298247, + "grad_norm": 1.7580994367599487, + "learning_rate": 3.783911490238191e-05, + "loss": 0.574, + "step": 11710 + }, + { + "epoch": 1.6449122807017544, + "grad_norm": 1.012489676475525, + "learning_rate": 3.782018246343661e-05, + "loss": 0.5028, + "step": 11720 + }, + { + "epoch": 1.6463157894736842, + "grad_norm": 0.9846917390823364, + "learning_rate": 3.780124004353987e-05, + "loss": 0.5425, + "step": 11730 + }, + { + "epoch": 1.647719298245614, + "grad_norm": 1.3875446319580078, + "learning_rate": 3.778228765743898e-05, + "loss": 0.4961, + "step": 11740 + }, + { + "epoch": 1.6491228070175439, + "grad_norm": 0.912992537021637, + "learning_rate": 3.776332531988903e-05, + "loss": 0.5135, + "step": 11750 + }, + { + "epoch": 1.6505263157894738, + "grad_norm": 1.4657293558120728, + "learning_rate": 3.774435304565288e-05, + "loss": 0.5917, + "step": 11760 + }, + { + "epoch": 1.6519298245614036, + "grad_norm": 1.4134496450424194, + "learning_rate": 3.772537084950106e-05, + "loss": 0.6529, + "step": 11770 + }, + { + "epoch": 1.6533333333333333, + "grad_norm": 1.0357835292816162, + "learning_rate": 3.770637874621189e-05, + "loss": 0.4853, + "step": 11780 + }, + { + "epoch": 1.654736842105263, + "grad_norm": 1.1166404485702515, + "learning_rate": 3.7687376750571347e-05, + "loss": 0.5509, + "step": 11790 + }, + { + "epoch": 1.656140350877193, + "grad_norm": 1.819243311882019, + "learning_rate": 3.7668364877373154e-05, + "loss": 0.5083, + "step": 11800 + }, + { + "epoch": 1.6575438596491228, + "grad_norm": 2.0793590545654297, + "learning_rate": 3.764934314141869e-05, + "loss": 0.5239, + "step": 11810 + }, + { + "epoch": 1.6589473684210527, + "grad_norm": 1.4497408866882324, + "learning_rate": 3.763031155751705e-05, + "loss": 0.5295, + "step": 11820 + }, + { + "epoch": 1.6603508771929825, + "grad_norm": 1.8401798009872437, + "learning_rate": 3.7611270140484956e-05, + "loss": 0.3987, + "step": 11830 + }, + { + "epoch": 1.6617543859649122, + "grad_norm": 1.0776817798614502, + "learning_rate": 3.759221890514681e-05, + "loss": 0.5236, + "step": 11840 + }, + { + "epoch": 1.663157894736842, + "grad_norm": 1.6151456832885742, + "learning_rate": 3.757315786633465e-05, + "loss": 0.4783, + "step": 11850 + }, + { + "epoch": 1.664561403508772, + "grad_norm": 2.149061679840088, + "learning_rate": 3.7554087038888155e-05, + "loss": 0.6304, + "step": 11860 + }, + { + "epoch": 1.6659649122807019, + "grad_norm": 1.848923683166504, + "learning_rate": 3.753500643765461e-05, + "loss": 0.4951, + "step": 11870 + }, + { + "epoch": 1.6673684210526316, + "grad_norm": 1.3279706239700317, + "learning_rate": 3.751591607748891e-05, + "loss": 0.5195, + "step": 11880 + }, + { + "epoch": 1.6687719298245614, + "grad_norm": 2.280778646469116, + "learning_rate": 3.749681597325357e-05, + "loss": 0.6116, + "step": 11890 + }, + { + "epoch": 1.670175438596491, + "grad_norm": 1.8069521188735962, + "learning_rate": 3.7477706139818683e-05, + "loss": 0.5038, + "step": 11900 + }, + { + "epoch": 1.671578947368421, + "grad_norm": 1.8922659158706665, + "learning_rate": 3.745858659206188e-05, + "loss": 0.5671, + "step": 11910 + }, + { + "epoch": 1.672982456140351, + "grad_norm": 1.6917041540145874, + "learning_rate": 3.743945734486841e-05, + "loss": 0.5559, + "step": 11920 + }, + { + "epoch": 1.6743859649122808, + "grad_norm": 1.2357120513916016, + "learning_rate": 3.742031841313103e-05, + "loss": 0.5069, + "step": 11930 + }, + { + "epoch": 1.6757894736842105, + "grad_norm": 1.5987924337387085, + "learning_rate": 3.7401169811750066e-05, + "loss": 0.5431, + "step": 11940 + }, + { + "epoch": 1.6771929824561402, + "grad_norm": 1.3475733995437622, + "learning_rate": 3.7382011555633365e-05, + "loss": 0.5636, + "step": 11950 + }, + { + "epoch": 1.6785964912280702, + "grad_norm": 1.4985164403915405, + "learning_rate": 3.736284365969627e-05, + "loss": 0.4871, + "step": 11960 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 1.2310782670974731, + "learning_rate": 3.7343666138861646e-05, + "loss": 0.6245, + "step": 11970 + }, + { + "epoch": 1.68140350877193, + "grad_norm": 1.7251943349838257, + "learning_rate": 3.7324479008059865e-05, + "loss": 0.5126, + "step": 11980 + }, + { + "epoch": 1.6828070175438596, + "grad_norm": 2.389265775680542, + "learning_rate": 3.7305282282228756e-05, + "loss": 0.5669, + "step": 11990 + }, + { + "epoch": 1.6842105263157894, + "grad_norm": 2.130988359451294, + "learning_rate": 3.728607597631363e-05, + "loss": 0.4796, + "step": 12000 + }, + { + "epoch": 1.6842105263157894, + "eval_loss": 0.6500447392463684, + "eval_runtime": 119.5925, + "eval_samples_per_second": 12.543, + "eval_steps_per_second": 3.136, + "step": 12000 + }, + { + "epoch": 1.6842105263157894, + "step": 12000, + "total_flos": 6.933368738955264e+17, + "train_loss": 0.6038338423768679, + "train_runtime": 5861.7175, + "train_samples_per_second": 24.31, + "train_steps_per_second": 6.078 + } + ], + "logging_steps": 10, + "max_steps": 35625, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 2000, + "total_flos": 6.933368738955264e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama2_13b_peft/news_commentary_it/training_args.bin b/llama2_13b_peft/news_commentary_it/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..af167c750acab3aff76c7f4ec3428ac3da073b63 --- /dev/null +++ b/llama2_13b_peft/news_commentary_it/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c760325c4e915ed6add2ca1fa0f2456628f1a65e9e53ca6ae7e92088e8ec81d2 +size 5176 diff --git a/llama2_13b_peft/news_commentary_it/training_eval_loss.png b/llama2_13b_peft/news_commentary_it/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..a7edaa057756a37411e06f8d00aad99df3881f20 Binary files /dev/null and b/llama2_13b_peft/news_commentary_it/training_eval_loss.png differ diff --git a/llama2_13b_peft/news_commentary_it/training_loss.png b/llama2_13b_peft/news_commentary_it/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..0cd71870d015d9281f5fc6e0b90202427f93b881 Binary files /dev/null and b/llama2_13b_peft/news_commentary_it/training_loss.png differ diff --git a/llama2_13b_peft/topical_chat/README.md b/llama2_13b_peft/topical_chat/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d608fba36e56b9e62512321a94b466087309dd33 --- /dev/null +++ b/llama2_13b_peft/topical_chat/README.md @@ -0,0 +1,89 @@ +--- +license: other +library_name: peft +tags: +- llama-factory +- lora +- generated_from_trainer +base_model: /data1/model/llama2/meta-llama/Llama2-13b +model-index: +- name: topical_chat_no_sys + results: [] +--- + + + +# topical_chat_no_sys + +This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the topical_chat_no_sys dataset. +It achieves the following results on the evaluation set: +- Loss: 1.8941 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 4 +- eval_batch_size: 4 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 2 +- total_train_batch_size: 8 +- total_eval_batch_size: 8 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 20 +- num_epochs: 5.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:------:|:----:|:---------------:| +| 2.1904 | 0.0472 | 100 | 2.1137 | +| 1.9627 | 0.0944 | 200 | 2.0589 | +| 2.0172 | 0.1416 | 300 | 2.0221 | +| 1.8965 | 0.1889 | 400 | 1.9968 | +| 1.9534 | 0.2361 | 500 | 1.9823 | +| 1.8621 | 0.2833 | 600 | 1.9679 | +| 1.9777 | 0.3305 | 700 | 1.9611 | +| 2.0865 | 0.3777 | 800 | 1.9544 | +| 1.9662 | 0.4249 | 900 | 1.9461 | +| 1.8352 | 0.4721 | 1000 | 1.9376 | +| 1.8973 | 0.5194 | 1100 | 1.9329 | +| 1.9688 | 0.5666 | 1200 | 1.9264 | +| 1.8383 | 0.6138 | 1300 | 1.9192 | +| 1.9032 | 0.6610 | 1400 | 1.9146 | +| 1.9295 | 0.7082 | 1500 | 1.9109 | +| 1.8207 | 0.7554 | 1600 | 1.9061 | +| 1.9119 | 0.8026 | 1700 | 1.9032 | +| 1.8392 | 0.8499 | 1800 | 1.9019 | +| 1.961 | 0.8971 | 1900 | 1.8994 | +| 1.8913 | 0.9443 | 2000 | 1.8945 | +| 1.8187 | 0.9915 | 2100 | 1.8941 | +| 1.7296 | 1.0387 | 2200 | 1.9006 | +| 1.6184 | 1.0859 | 2300 | 1.9040 | +| 1.6973 | 1.1331 | 2400 | 1.9056 | + + +### Framework versions + +- PEFT 0.10.0 +- Transformers 4.40.0 +- Pytorch 2.2.1 +- Datasets 2.18.0 +- Tokenizers 0.19.1 \ No newline at end of file diff --git a/llama2_13b_peft/topical_chat/adapter_config.json b/llama2_13b_peft/topical_chat/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3993e35c744e2e9685fa9500a9626c4efa56cf55 --- /dev/null +++ b/llama2_13b_peft/topical_chat/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "q_proj", + "gate_proj", + "up_proj", + "k_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama2_13b_peft/topical_chat/adapter_model.safetensors b/llama2_13b_peft/topical_chat/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b125471004322f3572d04d9bb04472e0b4818880 --- /dev/null +++ b/llama2_13b_peft/topical_chat/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e6e2ffd0565e16b426d000f5f8d1e22505be002104c4426830c2fe30127625f +size 125248064 diff --git a/llama2_13b_peft/topical_chat/all_results.json b/llama2_13b_peft/topical_chat/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3d7332eb24985a5ddbd3d1f4ed34bf225ee8ac64 --- /dev/null +++ b/llama2_13b_peft/topical_chat/all_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 1.13314447592068, + "eval_loss": 1.8941270112991333, + "eval_runtime": 40.8751, + "eval_samples_per_second": 21.823, + "eval_steps_per_second": 2.74, + "total_flos": 9.512959383227597e+17, + "train_loss": 1.9100826263427735, + "train_runtime": 3885.2685, + "train_samples_per_second": 21.805, + "train_steps_per_second": 2.726 +} \ No newline at end of file diff --git a/llama2_13b_peft/topical_chat/eval_results.json b/llama2_13b_peft/topical_chat/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..24432d1fabce0bc2ff7d9840aee39adde74339c8 --- /dev/null +++ b/llama2_13b_peft/topical_chat/eval_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 1.13314447592068, + "eval_loss": 1.8941270112991333, + "eval_runtime": 40.8751, + "eval_samples_per_second": 21.823, + "eval_steps_per_second": 2.74 +} \ No newline at end of file diff --git a/llama2_13b_peft/topical_chat/special_tokens_map.json b/llama2_13b_peft/topical_chat/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/llama2_13b_peft/topical_chat/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama2_13b_peft/topical_chat/tokenizer.model b/llama2_13b_peft/topical_chat/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/llama2_13b_peft/topical_chat/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/llama2_13b_peft/topical_chat/tokenizer_config.json b/llama2_13b_peft/topical_chat/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad --- /dev/null +++ b/llama2_13b_peft/topical_chat/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/llama2_13b_peft/topical_chat/train_results.json b/llama2_13b_peft/topical_chat/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ffed294e18da55058edb77a4d1f6e18d56ffd866 --- /dev/null +++ b/llama2_13b_peft/topical_chat/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.13314447592068, + "total_flos": 9.512959383227597e+17, + "train_loss": 1.9100826263427735, + "train_runtime": 3885.2685, + "train_samples_per_second": 21.805, + "train_steps_per_second": 2.726 +} \ No newline at end of file diff --git a/llama2_13b_peft/topical_chat/trainer_log.jsonl b/llama2_13b_peft/topical_chat/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b4c7d7c3fa1fcc9079ad6d3359ae9b12dca8ec95 --- /dev/null +++ b/llama2_13b_peft/topical_chat/trainer_log.jsonl @@ -0,0 +1,266 @@ +{"current_steps": 10, "total_steps": 10590, "loss": 2.8563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5e-05, "epoch": 0.004721435316336166, "percentage": 0.09, "elapsed_time": "0:00:13", "remaining_time": "3:54:01"} +{"current_steps": 20, "total_steps": 10590, "loss": 2.6853, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-05, "epoch": 0.009442870632672332, "percentage": 0.19, "elapsed_time": "0:00:25", "remaining_time": "3:42:39"} +{"current_steps": 30, "total_steps": 10590, "loss": 2.1764, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999988957701981e-05, "epoch": 0.014164305949008499, "percentage": 0.28, "elapsed_time": "0:00:37", "remaining_time": "3:37:08"} +{"current_steps": 40, "total_steps": 10590, "loss": 2.15, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99995583090547e-05, "epoch": 0.018885741265344664, "percentage": 0.38, "elapsed_time": "0:00:49", "remaining_time": "3:35:35"} +{"current_steps": 50, "total_steps": 10590, "loss": 2.1033, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999900619903104e-05, "epoch": 0.023607176581680833, "percentage": 0.47, "elapsed_time": "0:01:00", "remaining_time": "3:32:38"} +{"current_steps": 60, "total_steps": 10590, "loss": 2.1417, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999823325182607e-05, "epoch": 0.028328611898016998, "percentage": 0.57, "elapsed_time": "0:01:11", "remaining_time": "3:29:28"} +{"current_steps": 70, "total_steps": 10590, "loss": 2.1031, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9997239474267886e-05, "epoch": 0.033050047214353166, "percentage": 0.66, "elapsed_time": "0:01:22", "remaining_time": "3:26:19"} +{"current_steps": 80, "total_steps": 10590, "loss": 1.9967, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9996024875135365e-05, "epoch": 0.03777148253068933, "percentage": 0.76, "elapsed_time": "0:01:34", "remaining_time": "3:26:30"} +{"current_steps": 90, "total_steps": 10590, "loss": 2.1262, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999458946515808e-05, "epoch": 0.042492917847025496, "percentage": 0.85, "elapsed_time": "0:01:46", "remaining_time": "3:27:40"} +{"current_steps": 100, "total_steps": 10590, "loss": 2.1904, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9992933257016194e-05, "epoch": 0.047214353163361665, "percentage": 0.94, "elapsed_time": "0:01:58", "remaining_time": "3:27:57"} +{"current_steps": 100, "total_steps": 10590, "loss": null, "eval_loss": 2.113694906234741, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.047214353163361665, "percentage": 0.94, "elapsed_time": "0:01:58", "remaining_time": "3:27:57"} +{"current_steps": 110, "total_steps": 10590, "loss": 2.062, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99910562653404e-05, "epoch": 0.05193578847969783, "percentage": 1.04, "elapsed_time": "0:02:52", "remaining_time": "4:34:02"} +{"current_steps": 120, "total_steps": 10590, "loss": 1.9974, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9988958506711735e-05, "epoch": 0.056657223796033995, "percentage": 1.13, "elapsed_time": "0:03:04", "remaining_time": "4:28:26"} +{"current_steps": 130, "total_steps": 10590, "loss": 2.124, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9986639999661454e-05, "epoch": 0.061378659112370164, "percentage": 1.23, "elapsed_time": "0:03:16", "remaining_time": "4:24:08"} +{"current_steps": 140, "total_steps": 10590, "loss": 1.9903, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998410076467088e-05, "epoch": 0.06610009442870633, "percentage": 1.32, "elapsed_time": "0:03:28", "remaining_time": "4:19:49"} +{"current_steps": 150, "total_steps": 10590, "loss": 2.1054, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9981340824171194e-05, "epoch": 0.0708215297450425, "percentage": 1.42, "elapsed_time": "0:03:41", "remaining_time": "4:16:35"} +{"current_steps": 160, "total_steps": 10590, "loss": 2.0274, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997836020254328e-05, "epoch": 0.07554296506137866, "percentage": 1.51, "elapsed_time": "0:03:52", "remaining_time": "4:12:35"} +{"current_steps": 170, "total_steps": 10590, "loss": 2.1484, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997515892611746e-05, "epoch": 0.08026440037771483, "percentage": 1.61, "elapsed_time": "0:04:05", "remaining_time": "4:10:40"} +{"current_steps": 180, "total_steps": 10590, "loss": 1.9468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9971737023173286e-05, "epoch": 0.08498583569405099, "percentage": 1.7, "elapsed_time": "0:04:16", "remaining_time": "4:07:23"} +{"current_steps": 190, "total_steps": 10590, "loss": 2.0637, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996809452393931e-05, "epoch": 0.08970727101038715, "percentage": 1.79, "elapsed_time": "0:04:29", "remaining_time": "4:05:27"} +{"current_steps": 200, "total_steps": 10590, "loss": 1.9627, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996423146059277e-05, "epoch": 0.09442870632672333, "percentage": 1.89, "elapsed_time": "0:04:40", "remaining_time": "4:02:51"} +{"current_steps": 200, "total_steps": 10590, "loss": null, "eval_loss": 2.058908462524414, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.09442870632672333, "percentage": 1.89, "elapsed_time": "0:04:40", "remaining_time": "4:02:51"} +{"current_steps": 210, "total_steps": 10590, "loss": 1.9765, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996014786725935e-05, "epoch": 0.09915014164305949, "percentage": 1.98, "elapsed_time": "0:05:35", "remaining_time": "4:36:00"} +{"current_steps": 220, "total_steps": 10590, "loss": 2.0651, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9955843780012846e-05, "epoch": 0.10387157695939565, "percentage": 2.08, "elapsed_time": "0:05:45", "remaining_time": "4:31:31"} +{"current_steps": 230, "total_steps": 10590, "loss": 1.9729, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995131923687488e-05, "epoch": 0.10859301227573183, "percentage": 2.17, "elapsed_time": "0:05:56", "remaining_time": "4:27:34"} +{"current_steps": 240, "total_steps": 10590, "loss": 2.0653, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994657427781453e-05, "epoch": 0.11331444759206799, "percentage": 2.27, "elapsed_time": "0:06:08", "remaining_time": "4:25:01"} +{"current_steps": 250, "total_steps": 10590, "loss": 2.185, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994160894474799e-05, "epoch": 0.11803588290840415, "percentage": 2.36, "elapsed_time": "0:06:21", "remaining_time": "4:22:49"} +{"current_steps": 260, "total_steps": 10590, "loss": 2.0668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993642328153822e-05, "epoch": 0.12275731822474033, "percentage": 2.46, "elapsed_time": "0:06:33", "remaining_time": "4:20:17"} +{"current_steps": 270, "total_steps": 10590, "loss": 2.0643, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993101733399453e-05, "epoch": 0.1274787535410765, "percentage": 2.55, "elapsed_time": "0:06:44", "remaining_time": "4:17:34"} +{"current_steps": 280, "total_steps": 10590, "loss": 2.0303, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9925391149872184e-05, "epoch": 0.13220018885741266, "percentage": 2.64, "elapsed_time": "0:06:56", "remaining_time": "4:15:18"} +{"current_steps": 290, "total_steps": 10590, "loss": 1.9509, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9919544778871985e-05, "epoch": 0.1369216241737488, "percentage": 2.74, "elapsed_time": "0:07:07", "remaining_time": "4:13:21"} +{"current_steps": 300, "total_steps": 10590, "loss": 2.0172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991347827263982e-05, "epoch": 0.141643059490085, "percentage": 2.83, "elapsed_time": "0:07:19", "remaining_time": "4:11:22"} +{"current_steps": 300, "total_steps": 10590, "loss": null, "eval_loss": 2.0220935344696045, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.141643059490085, "percentage": 2.83, "elapsed_time": "0:07:19", "remaining_time": "4:11:22"} +{"current_steps": 310, "total_steps": 10590, "loss": 1.9862, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990719168476625e-05, "epoch": 0.14636449480642116, "percentage": 2.93, "elapsed_time": "0:08:13", "remaining_time": "4:32:50"} +{"current_steps": 320, "total_steps": 10590, "loss": 1.9686, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990068507078595e-05, "epoch": 0.1510859301227573, "percentage": 3.02, "elapsed_time": "0:08:25", "remaining_time": "4:30:13"} +{"current_steps": 330, "total_steps": 10590, "loss": 1.9751, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989395848817731e-05, "epoch": 0.1558073654390935, "percentage": 3.12, "elapsed_time": "0:08:38", "remaining_time": "4:28:26"} +{"current_steps": 340, "total_steps": 10590, "loss": 1.9844, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.988701199636186e-05, "epoch": 0.16052880075542966, "percentage": 3.21, "elapsed_time": "0:08:50", "remaining_time": "4:26:18"} +{"current_steps": 350, "total_steps": 10590, "loss": 2.1042, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987984565670382e-05, "epoch": 0.1652502360717658, "percentage": 3.31, "elapsed_time": "0:09:02", "remaining_time": "4:24:33"} +{"current_steps": 360, "total_steps": 10590, "loss": 1.9839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9872459532509436e-05, "epoch": 0.16997167138810199, "percentage": 3.4, "elapsed_time": "0:09:14", "remaining_time": "4:22:46"} +{"current_steps": 370, "total_steps": 10590, "loss": 1.9492, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9864853689026556e-05, "epoch": 0.17469310670443816, "percentage": 3.49, "elapsed_time": "0:09:26", "remaining_time": "4:20:52"} +{"current_steps": 380, "total_steps": 10590, "loss": 1.9942, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985702819344397e-05, "epoch": 0.1794145420207743, "percentage": 3.59, "elapsed_time": "0:09:38", "remaining_time": "4:18:53"} +{"current_steps": 390, "total_steps": 10590, "loss": 1.9091, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.984898311489085e-05, "epoch": 0.18413597733711048, "percentage": 3.68, "elapsed_time": "0:09:50", "remaining_time": "4:17:26"} +{"current_steps": 400, "total_steps": 10590, "loss": 1.8965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.98407185244361e-05, "epoch": 0.18885741265344666, "percentage": 3.78, "elapsed_time": "0:10:02", "remaining_time": "4:15:44"} +{"current_steps": 400, "total_steps": 10590, "loss": null, "eval_loss": 1.9968496561050415, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.18885741265344666, "percentage": 3.78, "elapsed_time": "0:10:02", "remaining_time": "4:15:44"} +{"current_steps": 410, "total_steps": 10590, "loss": 1.8476, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.983223449508779e-05, "epoch": 0.1935788479697828, "percentage": 3.87, "elapsed_time": "0:10:56", "remaining_time": "4:31:34"} +{"current_steps": 420, "total_steps": 10590, "loss": 1.9005, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982353110179246e-05, "epoch": 0.19830028328611898, "percentage": 3.97, "elapsed_time": "0:11:08", "remaining_time": "4:29:42"} +{"current_steps": 430, "total_steps": 10590, "loss": 1.9098, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9814608421434485e-05, "epoch": 0.20302171860245516, "percentage": 4.06, "elapsed_time": "0:11:20", "remaining_time": "4:28:09"} +{"current_steps": 440, "total_steps": 10590, "loss": 1.912, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9805466532835376e-05, "epoch": 0.2077431539187913, "percentage": 4.15, "elapsed_time": "0:11:33", "remaining_time": "4:26:40"} +{"current_steps": 450, "total_steps": 10590, "loss": 2.044, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.979610551675311e-05, "epoch": 0.21246458923512748, "percentage": 4.25, "elapsed_time": "0:11:45", "remaining_time": "4:24:56"} +{"current_steps": 460, "total_steps": 10590, "loss": 2.0439, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.978652545588137e-05, "epoch": 0.21718602455146366, "percentage": 4.34, "elapsed_time": "0:11:58", "remaining_time": "4:23:31"} +{"current_steps": 470, "total_steps": 10590, "loss": 2.017, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.977672643484889e-05, "epoch": 0.2219074598677998, "percentage": 4.44, "elapsed_time": "0:12:09", "remaining_time": "4:21:48"} +{"current_steps": 480, "total_steps": 10590, "loss": 1.9652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976670854021862e-05, "epoch": 0.22662889518413598, "percentage": 4.53, "elapsed_time": "0:12:20", "remaining_time": "4:20:04"} +{"current_steps": 490, "total_steps": 10590, "loss": 1.9621, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.975647186048703e-05, "epoch": 0.23135033050047216, "percentage": 4.63, "elapsed_time": "0:12:32", "remaining_time": "4:18:28"} +{"current_steps": 500, "total_steps": 10590, "loss": 1.9534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.97460164860833e-05, "epoch": 0.2360717658168083, "percentage": 4.72, "elapsed_time": "0:12:44", "remaining_time": "4:17:03"} +{"current_steps": 500, "total_steps": 10590, "loss": null, "eval_loss": 1.982257604598999, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.2360717658168083, "percentage": 4.72, "elapsed_time": "0:12:44", "remaining_time": "4:17:03"} +{"current_steps": 510, "total_steps": 10590, "loss": 1.9625, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.973534250936851e-05, "epoch": 0.24079320113314448, "percentage": 4.82, "elapsed_time": "0:13:36", "remaining_time": "4:28:58"} +{"current_steps": 520, "total_steps": 10590, "loss": 1.9728, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.972445002463485e-05, "epoch": 0.24551463644948066, "percentage": 4.91, "elapsed_time": "0:13:49", "remaining_time": "4:27:40"} +{"current_steps": 530, "total_steps": 10590, "loss": 1.9505, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.971333912810476e-05, "epoch": 0.2502360717658168, "percentage": 5.0, "elapsed_time": "0:13:59", "remaining_time": "4:25:32"} +{"current_steps": 540, "total_steps": 10590, "loss": 1.9735, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.970200991793012e-05, "epoch": 0.254957507082153, "percentage": 5.1, "elapsed_time": "0:14:11", "remaining_time": "4:24:04"} +{"current_steps": 550, "total_steps": 10590, "loss": 1.9265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.969046249419133e-05, "epoch": 0.25967894239848915, "percentage": 5.19, "elapsed_time": "0:14:23", "remaining_time": "4:22:38"} +{"current_steps": 560, "total_steps": 10590, "loss": 1.9292, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.967869695889647e-05, "epoch": 0.26440037771482533, "percentage": 5.29, "elapsed_time": "0:14:35", "remaining_time": "4:21:13"} +{"current_steps": 570, "total_steps": 10590, "loss": 1.9518, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966671341598037e-05, "epoch": 0.26912181303116145, "percentage": 5.38, "elapsed_time": "0:14:47", "remaining_time": "4:19:54"} +{"current_steps": 580, "total_steps": 10590, "loss": 1.9796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.965451197130373e-05, "epoch": 0.2738432483474976, "percentage": 5.48, "elapsed_time": "0:14:58", "remaining_time": "4:18:33"} +{"current_steps": 590, "total_steps": 10590, "loss": 1.9494, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.964209273265212e-05, "epoch": 0.2785646836638338, "percentage": 5.57, "elapsed_time": "0:15:10", "remaining_time": "4:17:06"} +{"current_steps": 600, "total_steps": 10590, "loss": 1.8621, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9629455809735105e-05, "epoch": 0.28328611898017, "percentage": 5.67, "elapsed_time": "0:15:21", "remaining_time": "4:15:50"} +{"current_steps": 600, "total_steps": 10590, "loss": null, "eval_loss": 1.9678794145584106, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.28328611898017, "percentage": 5.67, "elapsed_time": "0:15:21", "remaining_time": "4:15:50"} +{"current_steps": 610, "total_steps": 10590, "loss": 1.8698, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9616601314185206e-05, "epoch": 0.28800755429650615, "percentage": 5.76, "elapsed_time": "0:16:16", "remaining_time": "4:26:11"} +{"current_steps": 620, "total_steps": 10590, "loss": 1.9089, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9603529359556975e-05, "epoch": 0.2927289896128423, "percentage": 5.85, "elapsed_time": "0:16:27", "remaining_time": "4:24:44"} +{"current_steps": 630, "total_steps": 10590, "loss": 2.026, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.959024006132593e-05, "epoch": 0.29745042492917845, "percentage": 5.95, "elapsed_time": "0:16:39", "remaining_time": "4:23:21"} +{"current_steps": 640, "total_steps": 10590, "loss": 1.9059, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.95767335368876e-05, "epoch": 0.3021718602455146, "percentage": 6.04, "elapsed_time": "0:16:51", "remaining_time": "4:22:09"} +{"current_steps": 650, "total_steps": 10590, "loss": 1.973, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.956300990555643e-05, "epoch": 0.3068932955618508, "percentage": 6.14, "elapsed_time": "0:17:03", "remaining_time": "4:20:53"} +{"current_steps": 660, "total_steps": 10590, "loss": 1.9296, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.954906928856476e-05, "epoch": 0.311614730878187, "percentage": 6.23, "elapsed_time": "0:17:16", "remaining_time": "4:19:59"} +{"current_steps": 670, "total_steps": 10590, "loss": 1.9321, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.953491180906175e-05, "epoch": 0.31633616619452315, "percentage": 6.33, "elapsed_time": "0:17:28", "remaining_time": "4:18:43"} +{"current_steps": 680, "total_steps": 10590, "loss": 2.0338, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.952053759211229e-05, "epoch": 0.3210576015108593, "percentage": 6.42, "elapsed_time": "0:17:40", "remaining_time": "4:17:42"} +{"current_steps": 690, "total_steps": 10590, "loss": 1.9328, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9505946764695885e-05, "epoch": 0.32577903682719545, "percentage": 6.52, "elapsed_time": "0:17:53", "remaining_time": "4:16:37"} +{"current_steps": 700, "total_steps": 10590, "loss": 1.9777, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.949113945570555e-05, "epoch": 0.3305004721435316, "percentage": 6.61, "elapsed_time": "0:18:04", "remaining_time": "4:15:15"} +{"current_steps": 700, "total_steps": 10590, "loss": null, "eval_loss": 1.9610685110092163, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.3305004721435316, "percentage": 6.61, "elapsed_time": "0:18:04", "remaining_time": "4:15:15"} +{"current_steps": 710, "total_steps": 10590, "loss": 2.0113, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.947611579594666e-05, "epoch": 0.3352219074598678, "percentage": 6.7, "elapsed_time": "0:18:58", "remaining_time": "4:24:04"} +{"current_steps": 720, "total_steps": 10590, "loss": 1.9097, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9460875918135804e-05, "epoch": 0.33994334277620397, "percentage": 6.8, "elapsed_time": "0:19:10", "remaining_time": "4:22:50"} +{"current_steps": 730, "total_steps": 10590, "loss": 1.9226, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.944541995689958e-05, "epoch": 0.34466477809254015, "percentage": 6.89, "elapsed_time": "0:19:22", "remaining_time": "4:21:45"} +{"current_steps": 740, "total_steps": 10590, "loss": 1.9583, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9429748048773475e-05, "epoch": 0.3493862134088763, "percentage": 6.99, "elapsed_time": "0:19:35", "remaining_time": "4:20:52"} +{"current_steps": 750, "total_steps": 10590, "loss": 1.9101, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.941386033220058e-05, "epoch": 0.35410764872521244, "percentage": 7.08, "elapsed_time": "0:19:47", "remaining_time": "4:19:39"} +{"current_steps": 760, "total_steps": 10590, "loss": 1.9238, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9397756947530414e-05, "epoch": 0.3588290840415486, "percentage": 7.18, "elapsed_time": "0:20:00", "remaining_time": "4:18:52"} +{"current_steps": 770, "total_steps": 10590, "loss": 1.867, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.938143803701769e-05, "epoch": 0.3635505193578848, "percentage": 7.27, "elapsed_time": "0:20:11", "remaining_time": "4:17:36"} +{"current_steps": 780, "total_steps": 10590, "loss": 1.9233, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9364903744821014e-05, "epoch": 0.36827195467422097, "percentage": 7.37, "elapsed_time": "0:20:23", "remaining_time": "4:16:30"} +{"current_steps": 790, "total_steps": 10590, "loss": 1.8599, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.934815421700165e-05, "epoch": 0.37299338999055714, "percentage": 7.46, "elapsed_time": "0:20:35", "remaining_time": "4:15:22"} +{"current_steps": 800, "total_steps": 10590, "loss": 2.0865, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.933118960152222e-05, "epoch": 0.3777148253068933, "percentage": 7.55, "elapsed_time": "0:20:47", "remaining_time": "4:14:26"} +{"current_steps": 800, "total_steps": 10590, "loss": null, "eval_loss": 1.954448938369751, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.3777148253068933, "percentage": 7.55, "elapsed_time": "0:20:47", "remaining_time": "4:14:26"} +{"current_steps": 810, "total_steps": 10590, "loss": 1.9036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.931401004824541e-05, "epoch": 0.38243626062322944, "percentage": 7.65, "elapsed_time": "0:21:40", "remaining_time": "4:21:46"} +{"current_steps": 820, "total_steps": 10590, "loss": 1.92, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.92966157089326e-05, "epoch": 0.3871576959395656, "percentage": 7.74, "elapsed_time": "0:21:51", "remaining_time": "4:20:23"} +{"current_steps": 830, "total_steps": 10590, "loss": 1.9894, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.927900673724259e-05, "epoch": 0.3918791312559018, "percentage": 7.84, "elapsed_time": "0:22:04", "remaining_time": "4:19:29"} +{"current_steps": 840, "total_steps": 10590, "loss": 1.9978, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9261183288730176e-05, "epoch": 0.39660056657223797, "percentage": 7.93, "elapsed_time": "0:22:15", "remaining_time": "4:18:26"} +{"current_steps": 850, "total_steps": 10590, "loss": 1.8262, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9243145520844834e-05, "epoch": 0.40132200188857414, "percentage": 8.03, "elapsed_time": "0:22:27", "remaining_time": "4:17:20"} +{"current_steps": 860, "total_steps": 10590, "loss": 1.907, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9224893592929275e-05, "epoch": 0.4060434372049103, "percentage": 8.12, "elapsed_time": "0:22:39", "remaining_time": "4:16:24"} +{"current_steps": 870, "total_steps": 10590, "loss": 1.9766, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.92064276662181e-05, "epoch": 0.41076487252124644, "percentage": 8.22, "elapsed_time": "0:22:52", "remaining_time": "4:15:30"} +{"current_steps": 880, "total_steps": 10590, "loss": 1.9038, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9187747903836303e-05, "epoch": 0.4154863078375826, "percentage": 8.31, "elapsed_time": "0:23:04", "remaining_time": "4:14:37"} +{"current_steps": 890, "total_steps": 10590, "loss": 1.9733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9168854470797904e-05, "epoch": 0.4202077431539188, "percentage": 8.4, "elapsed_time": "0:23:16", "remaining_time": "4:13:45"} +{"current_steps": 900, "total_steps": 10590, "loss": 1.9662, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.914974753400443e-05, "epoch": 0.42492917847025496, "percentage": 8.5, "elapsed_time": "0:23:28", "remaining_time": "4:12:44"} +{"current_steps": 900, "total_steps": 10590, "loss": null, "eval_loss": 1.946061372756958, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.42492917847025496, "percentage": 8.5, "elapsed_time": "0:23:28", "remaining_time": "4:12:44"} +{"current_steps": 910, "total_steps": 10590, "loss": 2.0209, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.913042726224347e-05, "epoch": 0.42965061378659114, "percentage": 8.59, "elapsed_time": "0:24:23", "remaining_time": "4:19:27"} +{"current_steps": 920, "total_steps": 10590, "loss": 1.8343, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.911089382618718e-05, "epoch": 0.4343720491029273, "percentage": 8.69, "elapsed_time": "0:24:36", "remaining_time": "4:18:34"} +{"current_steps": 930, "total_steps": 10590, "loss": 1.952, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.909114739839079e-05, "epoch": 0.43909348441926344, "percentage": 8.78, "elapsed_time": "0:24:48", "remaining_time": "4:17:42"} +{"current_steps": 940, "total_steps": 10590, "loss": 1.9064, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.907118815329104e-05, "epoch": 0.4438149197355996, "percentage": 8.88, "elapsed_time": "0:25:00", "remaining_time": "4:16:48"} +{"current_steps": 950, "total_steps": 10590, "loss": 1.883, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.905101626720469e-05, "epoch": 0.4485363550519358, "percentage": 8.97, "elapsed_time": "0:25:12", "remaining_time": "4:15:48"} +{"current_steps": 960, "total_steps": 10590, "loss": 1.8716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.903063191832691e-05, "epoch": 0.45325779036827196, "percentage": 9.07, "elapsed_time": "0:25:26", "remaining_time": "4:15:15"} +{"current_steps": 970, "total_steps": 10590, "loss": 1.9534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.901003528672975e-05, "epoch": 0.45797922568460814, "percentage": 9.16, "elapsed_time": "0:25:40", "remaining_time": "4:14:33"} +{"current_steps": 980, "total_steps": 10590, "loss": 1.8552, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.898922655436052e-05, "epoch": 0.4627006610009443, "percentage": 9.25, "elapsed_time": "0:25:50", "remaining_time": "4:13:20"} +{"current_steps": 990, "total_steps": 10590, "loss": 1.974, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8968205905040207e-05, "epoch": 0.46742209631728043, "percentage": 9.35, "elapsed_time": "0:26:02", "remaining_time": "4:12:29"} +{"current_steps": 1000, "total_steps": 10590, "loss": 1.8352, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.894697352446182e-05, "epoch": 0.4721435316336166, "percentage": 9.44, "elapsed_time": "0:26:14", "remaining_time": "4:11:36"} +{"current_steps": 1000, "total_steps": 10590, "loss": null, "eval_loss": 1.9375569820404053, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.4721435316336166, "percentage": 9.44, "elapsed_time": "0:26:14", "remaining_time": "4:11:36"} +{"current_steps": 1010, "total_steps": 10590, "loss": 1.9139, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8925529600188794e-05, "epoch": 0.4768649669499528, "percentage": 9.54, "elapsed_time": "0:27:08", "remaining_time": "4:17:27"} +{"current_steps": 1020, "total_steps": 10590, "loss": 1.8118, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8903874321653274e-05, "epoch": 0.48158640226628896, "percentage": 9.63, "elapsed_time": "0:27:19", "remaining_time": "4:16:23"} +{"current_steps": 1030, "total_steps": 10590, "loss": 1.9924, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.88820078801545e-05, "epoch": 0.48630783758262514, "percentage": 9.73, "elapsed_time": "0:27:31", "remaining_time": "4:15:25"} +{"current_steps": 1040, "total_steps": 10590, "loss": 1.8814, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.885993046885708e-05, "epoch": 0.4910292728989613, "percentage": 9.82, "elapsed_time": "0:27:43", "remaining_time": "4:14:38"} +{"current_steps": 1050, "total_steps": 10590, "loss": 1.8928, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.883764228278931e-05, "epoch": 0.49575070821529743, "percentage": 9.92, "elapsed_time": "0:27:55", "remaining_time": "4:13:46"} +{"current_steps": 1060, "total_steps": 10590, "loss": 1.9071, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.881514351884141e-05, "epoch": 0.5004721435316336, "percentage": 10.01, "elapsed_time": "0:28:08", "remaining_time": "4:13:00"} +{"current_steps": 1070, "total_steps": 10590, "loss": 1.939, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.879243437576383e-05, "epoch": 0.5051935788479698, "percentage": 10.1, "elapsed_time": "0:28:21", "remaining_time": "4:12:18"} +{"current_steps": 1080, "total_steps": 10590, "loss": 1.8603, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.876951505416547e-05, "epoch": 0.509915014164306, "percentage": 10.2, "elapsed_time": "0:28:32", "remaining_time": "4:11:20"} +{"current_steps": 1090, "total_steps": 10590, "loss": 1.86, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8746385756511915e-05, "epoch": 0.5146364494806421, "percentage": 10.29, "elapsed_time": "0:28:44", "remaining_time": "4:10:29"} +{"current_steps": 1100, "total_steps": 10590, "loss": 1.8973, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.872304668712364e-05, "epoch": 0.5193578847969783, "percentage": 10.39, "elapsed_time": "0:28:55", "remaining_time": "4:09:32"} +{"current_steps": 1100, "total_steps": 10590, "loss": null, "eval_loss": 1.932855248451233, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.5193578847969783, "percentage": 10.39, "elapsed_time": "0:28:55", "remaining_time": "4:09:32"} +{"current_steps": 1110, "total_steps": 10590, "loss": 1.9125, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8699498052174205e-05, "epoch": 0.5240793201133145, "percentage": 10.48, "elapsed_time": "0:29:49", "remaining_time": "4:14:44"} +{"current_steps": 1120, "total_steps": 10590, "loss": 2.001, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.867574005968847e-05, "epoch": 0.5288007554296507, "percentage": 10.58, "elapsed_time": "0:30:02", "remaining_time": "4:14:04"} +{"current_steps": 1130, "total_steps": 10590, "loss": 1.8442, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8651772919540686e-05, "epoch": 0.5335221907459868, "percentage": 10.67, "elapsed_time": "0:30:16", "remaining_time": "4:13:26"} +{"current_steps": 1140, "total_steps": 10590, "loss": 1.9932, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.862759684345269e-05, "epoch": 0.5382436260623229, "percentage": 10.76, "elapsed_time": "0:30:28", "remaining_time": "4:12:33"} +{"current_steps": 1150, "total_steps": 10590, "loss": 1.8388, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.860321204499205e-05, "epoch": 0.5429650613786591, "percentage": 10.86, "elapsed_time": "0:30:39", "remaining_time": "4:11:43"} +{"current_steps": 1160, "total_steps": 10590, "loss": 1.7975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.857861873957011e-05, "epoch": 0.5476864966949953, "percentage": 10.95, "elapsed_time": "0:30:52", "remaining_time": "4:10:59"} +{"current_steps": 1170, "total_steps": 10590, "loss": 1.848, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.855381714444018e-05, "epoch": 0.5524079320113314, "percentage": 11.05, "elapsed_time": "0:31:04", "remaining_time": "4:10:14"} +{"current_steps": 1180, "total_steps": 10590, "loss": 1.9311, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8528807478695535e-05, "epoch": 0.5571293673276676, "percentage": 11.14, "elapsed_time": "0:31:16", "remaining_time": "4:09:26"} +{"current_steps": 1190, "total_steps": 10590, "loss": 1.8705, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.850358996326753e-05, "epoch": 0.5618508026440038, "percentage": 11.24, "elapsed_time": "0:31:28", "remaining_time": "4:08:38"} +{"current_steps": 1200, "total_steps": 10590, "loss": 1.9688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.84781648209236e-05, "epoch": 0.56657223796034, "percentage": 11.33, "elapsed_time": "0:31:41", "remaining_time": "4:07:56"} +{"current_steps": 1200, "total_steps": 10590, "loss": null, "eval_loss": 1.9264005422592163, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.56657223796034, "percentage": 11.33, "elapsed_time": "0:31:41", "remaining_time": "4:07:56"} +{"current_steps": 1210, "total_steps": 10590, "loss": 1.9038, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8452532276265364e-05, "epoch": 0.5712936732766761, "percentage": 11.43, "elapsed_time": "0:32:33", "remaining_time": "4:12:25"} +{"current_steps": 1220, "total_steps": 10590, "loss": 1.9581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.842669255572656e-05, "epoch": 0.5760151085930123, "percentage": 11.52, "elapsed_time": "0:32:45", "remaining_time": "4:11:34"} +{"current_steps": 1230, "total_steps": 10590, "loss": 1.8913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8400645887571126e-05, "epoch": 0.5807365439093485, "percentage": 11.61, "elapsed_time": "0:32:57", "remaining_time": "4:10:51"} +{"current_steps": 1240, "total_steps": 10590, "loss": 1.8597, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.837439250189111e-05, "epoch": 0.5854579792256847, "percentage": 11.71, "elapsed_time": "0:33:10", "remaining_time": "4:10:08"} +{"current_steps": 1250, "total_steps": 10590, "loss": 1.9072, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.834793263060468e-05, "epoch": 0.5901794145420207, "percentage": 11.8, "elapsed_time": "0:33:23", "remaining_time": "4:09:30"} +{"current_steps": 1260, "total_steps": 10590, "loss": 1.9781, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.832126650745405e-05, "epoch": 0.5949008498583569, "percentage": 11.9, "elapsed_time": "0:33:35", "remaining_time": "4:08:47"} +{"current_steps": 1270, "total_steps": 10590, "loss": 1.885, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.829439436800346e-05, "epoch": 0.5996222851746931, "percentage": 11.99, "elapsed_time": "0:33:46", "remaining_time": "4:07:53"} +{"current_steps": 1280, "total_steps": 10590, "loss": 1.8891, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8267316449637054e-05, "epoch": 0.6043437204910292, "percentage": 12.09, "elapsed_time": "0:33:59", "remaining_time": "4:07:12"} +{"current_steps": 1290, "total_steps": 10590, "loss": 1.8654, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8240032991556765e-05, "epoch": 0.6090651558073654, "percentage": 12.18, "elapsed_time": "0:34:10", "remaining_time": "4:06:21"} +{"current_steps": 1300, "total_steps": 10590, "loss": 1.8383, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.821254423478027e-05, "epoch": 0.6137865911237016, "percentage": 12.28, "elapsed_time": "0:34:22", "remaining_time": "4:05:38"} +{"current_steps": 1300, "total_steps": 10590, "loss": null, "eval_loss": 1.9191977977752686, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.6137865911237016, "percentage": 12.28, "elapsed_time": "0:34:22", "remaining_time": "4:05:38"} +{"current_steps": 1310, "total_steps": 10590, "loss": 1.8538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8184850422138795e-05, "epoch": 0.6185080264400378, "percentage": 12.37, "elapsed_time": "0:35:15", "remaining_time": "4:09:44"} +{"current_steps": 1320, "total_steps": 10590, "loss": 1.9403, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.815695179827502e-05, "epoch": 0.623229461756374, "percentage": 12.46, "elapsed_time": "0:35:27", "remaining_time": "4:08:58"} +{"current_steps": 1330, "total_steps": 10590, "loss": 1.8925, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.812884860964086e-05, "epoch": 0.6279508970727101, "percentage": 12.56, "elapsed_time": "0:35:38", "remaining_time": "4:08:08"} +{"current_steps": 1340, "total_steps": 10590, "loss": 1.8849, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8100541104495355e-05, "epoch": 0.6326723323890463, "percentage": 12.65, "elapsed_time": "0:35:51", "remaining_time": "4:07:34"} +{"current_steps": 1350, "total_steps": 10590, "loss": 1.8733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8072029532902426e-05, "epoch": 0.6373937677053825, "percentage": 12.75, "elapsed_time": "0:36:03", "remaining_time": "4:06:48"} +{"current_steps": 1360, "total_steps": 10590, "loss": 1.9357, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8043314146728705e-05, "epoch": 0.6421152030217187, "percentage": 12.84, "elapsed_time": "0:36:16", "remaining_time": "4:06:11"} +{"current_steps": 1370, "total_steps": 10590, "loss": 1.7913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8014395199641246e-05, "epoch": 0.6468366383380547, "percentage": 12.94, "elapsed_time": "0:36:27", "remaining_time": "4:05:22"} +{"current_steps": 1380, "total_steps": 10590, "loss": 1.9151, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.798527294710538e-05, "epoch": 0.6515580736543909, "percentage": 13.03, "elapsed_time": "0:36:40", "remaining_time": "4:04:42"} +{"current_steps": 1390, "total_steps": 10590, "loss": 1.9297, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.795594764638237e-05, "epoch": 0.6562795089707271, "percentage": 13.13, "elapsed_time": "0:36:52", "remaining_time": "4:04:01"} +{"current_steps": 1400, "total_steps": 10590, "loss": 1.9032, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.792641955652718e-05, "epoch": 0.6610009442870632, "percentage": 13.22, "elapsed_time": "0:37:04", "remaining_time": "4:03:24"} +{"current_steps": 1400, "total_steps": 10590, "loss": null, "eval_loss": 1.914588212966919, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.6610009442870632, "percentage": 13.22, "elapsed_time": "0:37:04", "remaining_time": "4:03:24"} +{"current_steps": 1410, "total_steps": 10590, "loss": 1.9032, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7896688938386195e-05, "epoch": 0.6657223796033994, "percentage": 13.31, "elapsed_time": "0:37:59", "remaining_time": "4:07:22"} +{"current_steps": 1420, "total_steps": 10590, "loss": 1.8854, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.786675605459487e-05, "epoch": 0.6704438149197356, "percentage": 13.41, "elapsed_time": "0:38:10", "remaining_time": "4:06:31"} +{"current_steps": 1430, "total_steps": 10590, "loss": 1.8865, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7836621169575494e-05, "epoch": 0.6751652502360718, "percentage": 13.5, "elapsed_time": "0:38:23", "remaining_time": "4:05:52"} +{"current_steps": 1440, "total_steps": 10590, "loss": 1.7515, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7806284549534755e-05, "epoch": 0.6798866855524079, "percentage": 13.6, "elapsed_time": "0:38:35", "remaining_time": "4:05:10"} +{"current_steps": 1450, "total_steps": 10590, "loss": 1.9624, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7775746462461446e-05, "epoch": 0.6846081208687441, "percentage": 13.69, "elapsed_time": "0:38:47", "remaining_time": "4:04:30"} +{"current_steps": 1460, "total_steps": 10590, "loss": 1.7875, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7745007178124114e-05, "epoch": 0.6893295561850803, "percentage": 13.79, "elapsed_time": "0:38:59", "remaining_time": "4:03:51"} +{"current_steps": 1470, "total_steps": 10590, "loss": 1.8984, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.771406696806861e-05, "epoch": 0.6940509915014165, "percentage": 13.88, "elapsed_time": "0:39:11", "remaining_time": "4:03:08"} +{"current_steps": 1480, "total_steps": 10590, "loss": 1.8594, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7682926105615754e-05, "epoch": 0.6987724268177526, "percentage": 13.98, "elapsed_time": "0:39:23", "remaining_time": "4:02:26"} +{"current_steps": 1490, "total_steps": 10590, "loss": 1.8484, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.76515848658589e-05, "epoch": 0.7034938621340887, "percentage": 14.07, "elapsed_time": "0:39:35", "remaining_time": "4:01:48"} +{"current_steps": 1500, "total_steps": 10590, "loss": 1.9295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.76200435256615e-05, "epoch": 0.7082152974504249, "percentage": 14.16, "elapsed_time": "0:39:47", "remaining_time": "4:01:05"} +{"current_steps": 1500, "total_steps": 10590, "loss": null, "eval_loss": 1.9108749628067017, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.7082152974504249, "percentage": 14.16, "elapsed_time": "0:39:47", "remaining_time": "4:01:05"} +{"current_steps": 1510, "total_steps": 10590, "loss": 1.8586, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.758830236365465e-05, "epoch": 0.7129367327667611, "percentage": 14.26, "elapsed_time": "0:40:40", "remaining_time": "4:04:36"} +{"current_steps": 1520, "total_steps": 10590, "loss": 1.9794, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7556361660234634e-05, "epoch": 0.7176581680830972, "percentage": 14.35, "elapsed_time": "0:40:52", "remaining_time": "4:03:55"} +{"current_steps": 1530, "total_steps": 10590, "loss": 1.9122, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.752422169756048e-05, "epoch": 0.7223796033994334, "percentage": 14.45, "elapsed_time": "0:41:05", "remaining_time": "4:03:16"} +{"current_steps": 1540, "total_steps": 10590, "loss": 1.9197, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.749188275955143e-05, "epoch": 0.7271010387157696, "percentage": 14.54, "elapsed_time": "0:41:16", "remaining_time": "4:02:33"} +{"current_steps": 1550, "total_steps": 10590, "loss": 1.8548, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.745934513188442e-05, "epoch": 0.7318224740321058, "percentage": 14.64, "elapsed_time": "0:41:29", "remaining_time": "4:02:00"} +{"current_steps": 1560, "total_steps": 10590, "loss": 1.8857, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7426609101991605e-05, "epoch": 0.7365439093484419, "percentage": 14.73, "elapsed_time": "0:41:41", "remaining_time": "4:01:17"} +{"current_steps": 1570, "total_steps": 10590, "loss": 1.876, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.739367495905778e-05, "epoch": 0.7412653446647781, "percentage": 14.83, "elapsed_time": "0:41:52", "remaining_time": "4:00:37"} +{"current_steps": 1580, "total_steps": 10590, "loss": 1.9355, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.736054299401785e-05, "epoch": 0.7459867799811143, "percentage": 14.92, "elapsed_time": "0:42:05", "remaining_time": "3:59:59"} +{"current_steps": 1590, "total_steps": 10590, "loss": 1.9286, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7327213499554234e-05, "epoch": 0.7507082152974505, "percentage": 15.01, "elapsed_time": "0:42:16", "remaining_time": "3:59:16"} +{"current_steps": 1600, "total_steps": 10590, "loss": 1.8207, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7293686770094294e-05, "epoch": 0.7554296506137866, "percentage": 15.11, "elapsed_time": "0:42:27", "remaining_time": "3:58:32"} +{"current_steps": 1600, "total_steps": 10590, "loss": null, "eval_loss": 1.906082034111023, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.7554296506137866, "percentage": 15.11, "elapsed_time": "0:42:27", "remaining_time": "3:58:32"} +{"current_steps": 1610, "total_steps": 10590, "loss": 1.9245, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.725996310180776e-05, "epoch": 0.7601510859301227, "percentage": 15.2, "elapsed_time": "0:43:21", "remaining_time": "4:01:50"} +{"current_steps": 1620, "total_steps": 10590, "loss": 1.8556, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7226042792604046e-05, "epoch": 0.7648725212464589, "percentage": 15.3, "elapsed_time": "0:43:32", "remaining_time": "4:01:07"} +{"current_steps": 1630, "total_steps": 10590, "loss": 1.9757, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.719192614212969e-05, "epoch": 0.7695939565627951, "percentage": 15.39, "elapsed_time": "0:43:44", "remaining_time": "4:00:28"} +{"current_steps": 1640, "total_steps": 10590, "loss": 2.0371, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7157613451765686e-05, "epoch": 0.7743153918791312, "percentage": 15.49, "elapsed_time": "0:43:56", "remaining_time": "3:59:48"} +{"current_steps": 1650, "total_steps": 10590, "loss": 1.8646, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7123105024624776e-05, "epoch": 0.7790368271954674, "percentage": 15.58, "elapsed_time": "0:44:08", "remaining_time": "3:59:09"} +{"current_steps": 1660, "total_steps": 10590, "loss": 1.8383, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.708840116554883e-05, "epoch": 0.7837582625118036, "percentage": 15.68, "elapsed_time": "0:44:21", "remaining_time": "3:58:35"} +{"current_steps": 1670, "total_steps": 10590, "loss": 1.9008, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7053502181106145e-05, "epoch": 0.7884796978281398, "percentage": 15.77, "elapsed_time": "0:44:33", "remaining_time": "3:57:59"} +{"current_steps": 1680, "total_steps": 10590, "loss": 1.8378, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.70184083795887e-05, "epoch": 0.7932011331444759, "percentage": 15.86, "elapsed_time": "0:44:44", "remaining_time": "3:57:18"} +{"current_steps": 1690, "total_steps": 10590, "loss": 1.8468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.698312007100947e-05, "epoch": 0.7979225684608121, "percentage": 15.96, "elapsed_time": "0:44:56", "remaining_time": "3:56:40"} +{"current_steps": 1700, "total_steps": 10590, "loss": 1.9119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.694763756709967e-05, "epoch": 0.8026440037771483, "percentage": 16.05, "elapsed_time": "0:45:07", "remaining_time": "3:55:57"} +{"current_steps": 1700, "total_steps": 10590, "loss": null, "eval_loss": 1.9032281637191772, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.8026440037771483, "percentage": 16.05, "elapsed_time": "0:45:07", "remaining_time": "3:55:57"} +{"current_steps": 1710, "total_steps": 10590, "loss": 1.9081, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.691196118130601e-05, "epoch": 0.8073654390934845, "percentage": 16.15, "elapsed_time": "0:45:59", "remaining_time": "3:58:52"} +{"current_steps": 1720, "total_steps": 10590, "loss": 1.8604, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.687609122878791e-05, "epoch": 0.8120868744098206, "percentage": 16.24, "elapsed_time": "0:46:10", "remaining_time": "3:58:07"} +{"current_steps": 1730, "total_steps": 10590, "loss": 1.7843, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6840028026414745e-05, "epoch": 0.8168083097261567, "percentage": 16.34, "elapsed_time": "0:46:22", "remaining_time": "3:57:32"} +{"current_steps": 1740, "total_steps": 10590, "loss": 1.8666, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6803771892763004e-05, "epoch": 0.8215297450424929, "percentage": 16.43, "elapsed_time": "0:46:33", "remaining_time": "3:56:49"} +{"current_steps": 1750, "total_steps": 10590, "loss": 1.8538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.676732314811353e-05, "epoch": 0.826251180358829, "percentage": 16.53, "elapsed_time": "0:46:46", "remaining_time": "3:56:15"} +{"current_steps": 1760, "total_steps": 10590, "loss": 1.8492, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.673068211444862e-05, "epoch": 0.8309726156751652, "percentage": 16.62, "elapsed_time": "0:46:57", "remaining_time": "3:55:35"} +{"current_steps": 1770, "total_steps": 10590, "loss": 1.8554, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.669384911544927e-05, "epoch": 0.8356940509915014, "percentage": 16.71, "elapsed_time": "0:47:08", "remaining_time": "3:54:56"} +{"current_steps": 1780, "total_steps": 10590, "loss": 1.9333, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.665682447649222e-05, "epoch": 0.8404154863078376, "percentage": 16.81, "elapsed_time": "0:47:21", "remaining_time": "3:54:24"} +{"current_steps": 1790, "total_steps": 10590, "loss": 1.7886, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.661960852464717e-05, "epoch": 0.8451369216241738, "percentage": 16.9, "elapsed_time": "0:47:33", "remaining_time": "3:53:47"} +{"current_steps": 1800, "total_steps": 10590, "loss": 1.8392, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6582201588673816e-05, "epoch": 0.8498583569405099, "percentage": 17.0, "elapsed_time": "0:47:45", "remaining_time": "3:53:13"} +{"current_steps": 1800, "total_steps": 10590, "loss": null, "eval_loss": 1.9019125699996948, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.8498583569405099, "percentage": 17.0, "elapsed_time": "0:47:45", "remaining_time": "3:53:13"} +{"current_steps": 1810, "total_steps": 10590, "loss": 1.8178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6544603999018966e-05, "epoch": 0.8545797922568461, "percentage": 17.09, "elapsed_time": "0:48:39", "remaining_time": "3:56:01"} +{"current_steps": 1820, "total_steps": 10590, "loss": 1.9773, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6506816087813685e-05, "epoch": 0.8593012275731823, "percentage": 17.19, "elapsed_time": "0:48:52", "remaining_time": "3:55:30"} +{"current_steps": 1830, "total_steps": 10590, "loss": 1.7574, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.646883818887025e-05, "epoch": 0.8640226628895185, "percentage": 17.28, "elapsed_time": "0:49:05", "remaining_time": "3:55:01"} +{"current_steps": 1840, "total_steps": 10590, "loss": 1.9249, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6430670637679295e-05, "epoch": 0.8687440982058546, "percentage": 17.37, "elapsed_time": "0:49:18", "remaining_time": "3:54:28"} +{"current_steps": 1850, "total_steps": 10590, "loss": 1.8666, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.63923137714068e-05, "epoch": 0.8734655335221907, "percentage": 17.47, "elapsed_time": "0:49:30", "remaining_time": "3:53:53"} +{"current_steps": 1860, "total_steps": 10590, "loss": 1.8948, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.635376792889111e-05, "epoch": 0.8781869688385269, "percentage": 17.56, "elapsed_time": "0:49:42", "remaining_time": "3:53:18"} +{"current_steps": 1870, "total_steps": 10590, "loss": 1.8183, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6315033450639996e-05, "epoch": 0.882908404154863, "percentage": 17.66, "elapsed_time": "0:49:54", "remaining_time": "3:52:43"} +{"current_steps": 1880, "total_steps": 10590, "loss": 1.8918, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6276110678827555e-05, "epoch": 0.8876298394711992, "percentage": 17.75, "elapsed_time": "0:50:06", "remaining_time": "3:52:09"} +{"current_steps": 1890, "total_steps": 10590, "loss": 1.8303, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6236999957291275e-05, "epoch": 0.8923512747875354, "percentage": 17.85, "elapsed_time": "0:50:18", "remaining_time": "3:51:32"} +{"current_steps": 1900, "total_steps": 10590, "loss": 1.961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.619770163152896e-05, "epoch": 0.8970727101038716, "percentage": 17.94, "elapsed_time": "0:50:30", "remaining_time": "3:51:00"} +{"current_steps": 1900, "total_steps": 10590, "loss": null, "eval_loss": 1.8994309902191162, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.8970727101038716, "percentage": 17.94, "elapsed_time": "0:50:30", "remaining_time": "3:51:00"} +{"current_steps": 1910, "total_steps": 10590, "loss": 1.8132, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.615821604869564e-05, "epoch": 0.9017941454202077, "percentage": 18.04, "elapsed_time": "0:51:24", "remaining_time": "3:53:35"} +{"current_steps": 1920, "total_steps": 10590, "loss": 1.8418, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.61185435576006e-05, "epoch": 0.9065155807365439, "percentage": 18.13, "elapsed_time": "0:51:35", "remaining_time": "3:53:00"} +{"current_steps": 1930, "total_steps": 10590, "loss": 1.8774, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.607868450870421e-05, "epoch": 0.9112370160528801, "percentage": 18.22, "elapsed_time": "0:51:48", "remaining_time": "3:52:26"} +{"current_steps": 1940, "total_steps": 10590, "loss": 1.9419, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6038639254114855e-05, "epoch": 0.9159584513692163, "percentage": 18.32, "elapsed_time": "0:51:59", "remaining_time": "3:51:48"} +{"current_steps": 1950, "total_steps": 10590, "loss": 2.0166, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.599840814758587e-05, "epoch": 0.9206798866855525, "percentage": 18.41, "elapsed_time": "0:52:13", "remaining_time": "3:51:21"} +{"current_steps": 1960, "total_steps": 10590, "loss": 1.9425, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5957991544512316e-05, "epoch": 0.9254013220018886, "percentage": 18.51, "elapsed_time": "0:52:24", "remaining_time": "3:50:46"} +{"current_steps": 1970, "total_steps": 10590, "loss": 1.8323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.591738980192796e-05, "epoch": 0.9301227573182247, "percentage": 18.6, "elapsed_time": "0:52:36", "remaining_time": "3:50:12"} +{"current_steps": 1980, "total_steps": 10590, "loss": 1.8055, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.587660327850203e-05, "epoch": 0.9348441926345609, "percentage": 18.7, "elapsed_time": "0:52:47", "remaining_time": "3:49:34"} +{"current_steps": 1990, "total_steps": 10590, "loss": 1.7541, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.583563233453607e-05, "epoch": 0.939565627950897, "percentage": 18.79, "elapsed_time": "0:53:01", "remaining_time": "3:49:08"} +{"current_steps": 2000, "total_steps": 10590, "loss": 1.8913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.579447733196079e-05, "epoch": 0.9442870632672332, "percentage": 18.89, "elapsed_time": "0:53:13", "remaining_time": "3:48:37"} +{"current_steps": 2000, "total_steps": 10590, "loss": null, "eval_loss": 1.8945337533950806, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.9442870632672332, "percentage": 18.89, "elapsed_time": "0:53:13", "remaining_time": "3:48:37"} +{"current_steps": 2010, "total_steps": 10590, "loss": 1.8698, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5753138634332835e-05, "epoch": 0.9490084985835694, "percentage": 18.98, "elapsed_time": "0:54:07", "remaining_time": "3:51:01"} +{"current_steps": 2020, "total_steps": 10590, "loss": 1.893, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5711616606831576e-05, "epoch": 0.9537299338999056, "percentage": 19.07, "elapsed_time": "0:54:19", "remaining_time": "3:50:26"} +{"current_steps": 2030, "total_steps": 10590, "loss": 1.965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.566991161625589e-05, "epoch": 0.9584513692162417, "percentage": 19.17, "elapsed_time": "0:54:29", "remaining_time": "3:49:48"} +{"current_steps": 2040, "total_steps": 10590, "loss": 1.7615, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.562802403102093e-05, "epoch": 0.9631728045325779, "percentage": 19.26, "elapsed_time": "0:54:40", "remaining_time": "3:49:10"} +{"current_steps": 2050, "total_steps": 10590, "loss": 1.8079, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5585954221154856e-05, "epoch": 0.9678942398489141, "percentage": 19.36, "elapsed_time": "0:54:52", "remaining_time": "3:48:35"} +{"current_steps": 2060, "total_steps": 10590, "loss": 1.8026, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.554370255829558e-05, "epoch": 0.9726156751652503, "percentage": 19.45, "elapsed_time": "0:55:04", "remaining_time": "3:48:01"} +{"current_steps": 2070, "total_steps": 10590, "loss": 1.8584, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.550126941568744e-05, "epoch": 0.9773371104815864, "percentage": 19.55, "elapsed_time": "0:55:16", "remaining_time": "3:47:30"} +{"current_steps": 2080, "total_steps": 10590, "loss": 1.7369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5458655168177974e-05, "epoch": 0.9820585457979226, "percentage": 19.64, "elapsed_time": "0:55:29", "remaining_time": "3:47:00"} +{"current_steps": 2090, "total_steps": 10590, "loss": 1.8483, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.541586019221457e-05, "epoch": 0.9867799811142587, "percentage": 19.74, "elapsed_time": "0:55:42", "remaining_time": "3:46:34"} +{"current_steps": 2100, "total_steps": 10590, "loss": 1.8187, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5372884865841114e-05, "epoch": 0.9915014164305949, "percentage": 19.83, "elapsed_time": "0:55:54", "remaining_time": "3:46:02"} +{"current_steps": 2100, "total_steps": 10590, "loss": null, "eval_loss": 1.8941270112991333, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.9915014164305949, "percentage": 19.83, "elapsed_time": "0:55:54", "remaining_time": "3:46:02"} +{"current_steps": 2110, "total_steps": 10590, "loss": 1.9258, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.532972956869471e-05, "epoch": 0.996222851746931, "percentage": 19.92, "elapsed_time": "0:56:48", "remaining_time": "3:48:16"} +{"current_steps": 2120, "total_steps": 10590, "loss": 1.8346, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.528639468200226e-05, "epoch": 1.0009442870632672, "percentage": 20.02, "elapsed_time": "0:56:59", "remaining_time": "3:47:42"} +{"current_steps": 2130, "total_steps": 10590, "loss": 1.7275, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.524288058857717e-05, "epoch": 1.0056657223796035, "percentage": 20.11, "elapsed_time": "0:57:11", "remaining_time": "3:47:10"} +{"current_steps": 2140, "total_steps": 10590, "loss": 1.6719, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.51991876728159e-05, "epoch": 1.0103871576959396, "percentage": 20.21, "elapsed_time": "0:57:25", "remaining_time": "3:46:44"} +{"current_steps": 2150, "total_steps": 10590, "loss": 1.8067, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.515531632069461e-05, "epoch": 1.0151085930122756, "percentage": 20.3, "elapsed_time": "0:57:37", "remaining_time": "3:46:11"} +{"current_steps": 2160, "total_steps": 10590, "loss": 1.7311, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.511126691976574e-05, "epoch": 1.019830028328612, "percentage": 20.4, "elapsed_time": "0:57:50", "remaining_time": "3:45:42"} +{"current_steps": 2170, "total_steps": 10590, "loss": 1.8171, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.506703985915457e-05, "epoch": 1.024551463644948, "percentage": 20.49, "elapsed_time": "0:58:01", "remaining_time": "3:45:07"} +{"current_steps": 2180, "total_steps": 10590, "loss": 1.6716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.502263552955581e-05, "epoch": 1.0292728989612843, "percentage": 20.59, "elapsed_time": "0:58:13", "remaining_time": "3:44:37"} +{"current_steps": 2190, "total_steps": 10590, "loss": 1.7456, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.497805432323015e-05, "epoch": 1.0339943342776203, "percentage": 20.68, "elapsed_time": "0:58:24", "remaining_time": "3:44:03"} +{"current_steps": 2200, "total_steps": 10590, "loss": 1.7296, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4933296634000734e-05, "epoch": 1.0387157695939566, "percentage": 20.77, "elapsed_time": "0:58:36", "remaining_time": "3:43:30"} +{"current_steps": 2200, "total_steps": 10590, "loss": null, "eval_loss": 1.9005860090255737, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.0387157695939566, "percentage": 20.77, "elapsed_time": "0:58:36", "remaining_time": "3:43:30"} +{"current_steps": 2210, "total_steps": 10590, "loss": 1.7202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4888362857249775e-05, "epoch": 1.0434372049102927, "percentage": 20.87, "elapsed_time": "0:59:29", "remaining_time": "3:45:36"} +{"current_steps": 2220, "total_steps": 10590, "loss": 1.7879, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.484325338991499e-05, "epoch": 1.048158640226629, "percentage": 20.96, "elapsed_time": "0:59:40", "remaining_time": "3:45:00"} +{"current_steps": 2230, "total_steps": 10590, "loss": 1.7202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4797968630486135e-05, "epoch": 1.052880075542965, "percentage": 21.06, "elapsed_time": "0:59:52", "remaining_time": "3:44:28"} +{"current_steps": 2240, "total_steps": 10590, "loss": 1.6839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.475250897900144e-05, "epoch": 1.0576015108593013, "percentage": 21.15, "elapsed_time": "1:00:04", "remaining_time": "3:43:55"} +{"current_steps": 2250, "total_steps": 10590, "loss": 1.7942, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.470687483704413e-05, "epoch": 1.0623229461756374, "percentage": 21.25, "elapsed_time": "1:00:16", "remaining_time": "3:43:23"} +{"current_steps": 2260, "total_steps": 10590, "loss": 1.7255, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.466106660773885e-05, "epoch": 1.0670443814919737, "percentage": 21.34, "elapsed_time": "1:00:27", "remaining_time": "3:42:51"} +{"current_steps": 2270, "total_steps": 10590, "loss": 1.6271, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4615084695748074e-05, "epoch": 1.0717658168083097, "percentage": 21.44, "elapsed_time": "1:00:39", "remaining_time": "3:42:21"} +{"current_steps": 2280, "total_steps": 10590, "loss": 1.7444, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.456892950726861e-05, "epoch": 1.0764872521246458, "percentage": 21.53, "elapsed_time": "1:00:51", "remaining_time": "3:41:47"} +{"current_steps": 2290, "total_steps": 10590, "loss": 1.7943, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.452260145002791e-05, "epoch": 1.081208687440982, "percentage": 21.62, "elapsed_time": "1:01:03", "remaining_time": "3:41:16"} +{"current_steps": 2300, "total_steps": 10590, "loss": 1.6184, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.447610093328056e-05, "epoch": 1.0859301227573182, "percentage": 21.72, "elapsed_time": "1:01:15", "remaining_time": "3:40:46"} +{"current_steps": 2300, "total_steps": 10590, "loss": null, "eval_loss": 1.9040113687515259, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.0859301227573182, "percentage": 21.72, "elapsed_time": "1:01:15", "remaining_time": "3:40:46"} +{"current_steps": 2310, "total_steps": 10590, "loss": 1.7606, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4429428367804605e-05, "epoch": 1.0906515580736544, "percentage": 21.81, "elapsed_time": "1:02:08", "remaining_time": "3:42:45"} +{"current_steps": 2320, "total_steps": 10590, "loss": 1.7419, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.438258416589794e-05, "epoch": 1.0953729933899905, "percentage": 21.91, "elapsed_time": "1:02:21", "remaining_time": "3:42:16"} +{"current_steps": 2330, "total_steps": 10590, "loss": 1.6251, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.43355687413747e-05, "epoch": 1.1000944287063268, "percentage": 22.0, "elapsed_time": "1:02:32", "remaining_time": "3:41:44"} +{"current_steps": 2340, "total_steps": 10590, "loss": 1.7576, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.428838250956153e-05, "epoch": 1.1048158640226629, "percentage": 22.1, "elapsed_time": "1:02:46", "remaining_time": "3:41:18"} +{"current_steps": 2350, "total_steps": 10590, "loss": 1.758, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4241025887293976e-05, "epoch": 1.1095372993389991, "percentage": 22.19, "elapsed_time": "1:02:58", "remaining_time": "3:40:47"} +{"current_steps": 2360, "total_steps": 10590, "loss": 1.759, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.419349929291279e-05, "epoch": 1.1142587346553352, "percentage": 22.29, "elapsed_time": "1:03:10", "remaining_time": "3:40:19"} +{"current_steps": 2370, "total_steps": 10590, "loss": 1.7568, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.414580314626023e-05, "epoch": 1.1189801699716715, "percentage": 22.38, "elapsed_time": "1:03:23", "remaining_time": "3:39:51"} +{"current_steps": 2380, "total_steps": 10590, "loss": 1.684, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4097937868676345e-05, "epoch": 1.1237016052880076, "percentage": 22.47, "elapsed_time": "1:03:34", "remaining_time": "3:39:20"} +{"current_steps": 2390, "total_steps": 10590, "loss": 1.6816, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.404990388299527e-05, "epoch": 1.1284230406043436, "percentage": 22.57, "elapsed_time": "1:03:46", "remaining_time": "3:38:50"} +{"current_steps": 2400, "total_steps": 10590, "loss": 1.6973, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4001701613541456e-05, "epoch": 1.13314447592068, "percentage": 22.66, "elapsed_time": "1:03:59", "remaining_time": "3:38:21"} +{"current_steps": 2400, "total_steps": 10590, "loss": null, "eval_loss": 1.9056047201156616, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.13314447592068, "percentage": 22.66, "elapsed_time": "1:03:59", "remaining_time": "3:38:21"} +{"current_steps": 2400, "total_steps": 10590, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.13314447592068, "percentage": 22.66, "elapsed_time": "1:03:59", "remaining_time": "3:38:21"} +{"current_steps": 112, "total_steps": 112, "loss": null, "eval_loss": 1.8941270112991333, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.13314447592068, "percentage": 100.0, "elapsed_time": "1:05:30", "remaining_time": "0:00:00"} diff --git a/llama2_13b_peft/topical_chat/trainer_state.json b/llama2_13b_peft/topical_chat/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0ac5c19fcd44d9de7cbdcac1a3a93337c44a76d3 --- /dev/null +++ b/llama2_13b_peft/topical_chat/trainer_state.json @@ -0,0 +1,1902 @@ +{ + "best_metric": 1.8941270112991333, + "best_model_checkpoint": "ckpt/llama2_13b_other/topical_chat_no_sys/checkpoint-2100", + "epoch": 1.13314447592068, + "eval_steps": 100, + "global_step": 2400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.004721435316336166, + "grad_norm": 0.80355304479599, + "learning_rate": 2.5e-05, + "loss": 2.8563, + "step": 10 + }, + { + "epoch": 0.009442870632672332, + "grad_norm": 0.9893380403518677, + "learning_rate": 5e-05, + "loss": 2.6853, + "step": 20 + }, + { + "epoch": 0.014164305949008499, + "grad_norm": 0.6218120455741882, + "learning_rate": 4.999988957701981e-05, + "loss": 2.1764, + "step": 30 + }, + { + "epoch": 0.018885741265344664, + "grad_norm": 0.7985787987709045, + "learning_rate": 4.99995583090547e-05, + "loss": 2.15, + "step": 40 + }, + { + "epoch": 0.023607176581680833, + "grad_norm": 0.6444200873374939, + "learning_rate": 4.999900619903104e-05, + "loss": 2.1033, + "step": 50 + }, + { + "epoch": 0.028328611898016998, + "grad_norm": 0.7646850943565369, + "learning_rate": 4.999823325182607e-05, + "loss": 2.1417, + "step": 60 + }, + { + "epoch": 0.033050047214353166, + "grad_norm": 1.0737015008926392, + "learning_rate": 4.9997239474267886e-05, + "loss": 2.1031, + "step": 70 + }, + { + "epoch": 0.03777148253068933, + "grad_norm": 0.8393265604972839, + "learning_rate": 4.9996024875135365e-05, + "loss": 1.9967, + "step": 80 + }, + { + "epoch": 0.042492917847025496, + "grad_norm": 0.9493579864501953, + "learning_rate": 4.999458946515808e-05, + "loss": 2.1262, + "step": 90 + }, + { + "epoch": 0.047214353163361665, + "grad_norm": 1.1938248872756958, + "learning_rate": 4.9992933257016194e-05, + "loss": 2.1904, + "step": 100 + }, + { + "epoch": 0.047214353163361665, + "eval_loss": 2.113694906234741, + "eval_runtime": 40.9172, + "eval_samples_per_second": 21.8, + "eval_steps_per_second": 2.737, + "step": 100 + }, + { + "epoch": 0.05193578847969783, + "grad_norm": 1.044633388519287, + "learning_rate": 4.99910562653404e-05, + "loss": 2.062, + "step": 110 + }, + { + "epoch": 0.056657223796033995, + "grad_norm": 0.8298002481460571, + "learning_rate": 4.9988958506711735e-05, + "loss": 1.9974, + "step": 120 + }, + { + "epoch": 0.061378659112370164, + "grad_norm": 0.9604804515838623, + "learning_rate": 4.9986639999661454e-05, + "loss": 2.124, + "step": 130 + }, + { + "epoch": 0.06610009442870633, + "grad_norm": 1.0080113410949707, + "learning_rate": 4.998410076467088e-05, + "loss": 1.9903, + "step": 140 + }, + { + "epoch": 0.0708215297450425, + "grad_norm": 1.0866179466247559, + "learning_rate": 4.9981340824171194e-05, + "loss": 2.1054, + "step": 150 + }, + { + "epoch": 0.07554296506137866, + "grad_norm": 0.8573047518730164, + "learning_rate": 4.997836020254328e-05, + "loss": 2.0274, + "step": 160 + }, + { + "epoch": 0.08026440037771483, + "grad_norm": 1.019667625427246, + "learning_rate": 4.997515892611746e-05, + "loss": 2.1484, + "step": 170 + }, + { + "epoch": 0.08498583569405099, + "grad_norm": 0.9652569890022278, + "learning_rate": 4.9971737023173286e-05, + "loss": 1.9468, + "step": 180 + }, + { + "epoch": 0.08970727101038715, + "grad_norm": 1.3909372091293335, + "learning_rate": 4.996809452393931e-05, + "loss": 2.0637, + "step": 190 + }, + { + "epoch": 0.09442870632672333, + "grad_norm": 0.7734001874923706, + "learning_rate": 4.996423146059277e-05, + "loss": 1.9627, + "step": 200 + }, + { + "epoch": 0.09442870632672333, + "eval_loss": 2.058908462524414, + "eval_runtime": 41.0852, + "eval_samples_per_second": 21.711, + "eval_steps_per_second": 2.726, + "step": 200 + }, + { + "epoch": 0.09915014164305949, + "grad_norm": 0.8814783096313477, + "learning_rate": 4.996014786725935e-05, + "loss": 1.9765, + "step": 210 + }, + { + "epoch": 0.10387157695939565, + "grad_norm": 1.1643282175064087, + "learning_rate": 4.9955843780012846e-05, + "loss": 2.0651, + "step": 220 + }, + { + "epoch": 0.10859301227573183, + "grad_norm": 1.2584123611450195, + "learning_rate": 4.995131923687488e-05, + "loss": 1.9729, + "step": 230 + }, + { + "epoch": 0.11331444759206799, + "grad_norm": 1.1681088209152222, + "learning_rate": 4.994657427781453e-05, + "loss": 2.0653, + "step": 240 + }, + { + "epoch": 0.11803588290840415, + "grad_norm": 1.009667158126831, + "learning_rate": 4.994160894474799e-05, + "loss": 2.185, + "step": 250 + }, + { + "epoch": 0.12275731822474033, + "grad_norm": 1.0578892230987549, + "learning_rate": 4.993642328153822e-05, + "loss": 2.0668, + "step": 260 + }, + { + "epoch": 0.1274787535410765, + "grad_norm": 1.0033955574035645, + "learning_rate": 4.993101733399453e-05, + "loss": 2.0643, + "step": 270 + }, + { + "epoch": 0.13220018885741266, + "grad_norm": 1.250969648361206, + "learning_rate": 4.9925391149872184e-05, + "loss": 2.0303, + "step": 280 + }, + { + "epoch": 0.1369216241737488, + "grad_norm": 1.1165454387664795, + "learning_rate": 4.9919544778871985e-05, + "loss": 1.9509, + "step": 290 + }, + { + "epoch": 0.141643059490085, + "grad_norm": 0.8889014720916748, + "learning_rate": 4.991347827263982e-05, + "loss": 2.0172, + "step": 300 + }, + { + "epoch": 0.141643059490085, + "eval_loss": 2.0220935344696045, + "eval_runtime": 41.0678, + "eval_samples_per_second": 21.72, + "eval_steps_per_second": 2.727, + "step": 300 + }, + { + "epoch": 0.14636449480642116, + "grad_norm": 1.275136947631836, + "learning_rate": 4.990719168476625e-05, + "loss": 1.9862, + "step": 310 + }, + { + "epoch": 0.1510859301227573, + "grad_norm": 0.959540069103241, + "learning_rate": 4.990068507078595e-05, + "loss": 1.9686, + "step": 320 + }, + { + "epoch": 0.1558073654390935, + "grad_norm": 0.8156929612159729, + "learning_rate": 4.989395848817731e-05, + "loss": 1.9751, + "step": 330 + }, + { + "epoch": 0.16052880075542966, + "grad_norm": 1.4008054733276367, + "learning_rate": 4.988701199636186e-05, + "loss": 1.9844, + "step": 340 + }, + { + "epoch": 0.1652502360717658, + "grad_norm": 1.4623981714248657, + "learning_rate": 4.987984565670382e-05, + "loss": 2.1042, + "step": 350 + }, + { + "epoch": 0.16997167138810199, + "grad_norm": 1.0944546461105347, + "learning_rate": 4.9872459532509436e-05, + "loss": 1.9839, + "step": 360 + }, + { + "epoch": 0.17469310670443816, + "grad_norm": 1.0253016948699951, + "learning_rate": 4.9864853689026556e-05, + "loss": 1.9492, + "step": 370 + }, + { + "epoch": 0.1794145420207743, + "grad_norm": 1.1482458114624023, + "learning_rate": 4.985702819344397e-05, + "loss": 1.9942, + "step": 380 + }, + { + "epoch": 0.18413597733711048, + "grad_norm": 1.094506859779358, + "learning_rate": 4.984898311489085e-05, + "loss": 1.9091, + "step": 390 + }, + { + "epoch": 0.18885741265344666, + "grad_norm": 1.3459991216659546, + "learning_rate": 4.98407185244361e-05, + "loss": 1.8965, + "step": 400 + }, + { + "epoch": 0.18885741265344666, + "eval_loss": 1.9968496561050415, + "eval_runtime": 41.0802, + "eval_samples_per_second": 21.714, + "eval_steps_per_second": 2.726, + "step": 400 + }, + { + "epoch": 0.1935788479697828, + "grad_norm": 1.5208889245986938, + "learning_rate": 4.983223449508779e-05, + "loss": 1.8476, + "step": 410 + }, + { + "epoch": 0.19830028328611898, + "grad_norm": 1.0982236862182617, + "learning_rate": 4.982353110179246e-05, + "loss": 1.9005, + "step": 420 + }, + { + "epoch": 0.20302171860245516, + "grad_norm": 1.247300624847412, + "learning_rate": 4.9814608421434485e-05, + "loss": 1.9098, + "step": 430 + }, + { + "epoch": 0.2077431539187913, + "grad_norm": 0.9733744859695435, + "learning_rate": 4.9805466532835376e-05, + "loss": 1.912, + "step": 440 + }, + { + "epoch": 0.21246458923512748, + "grad_norm": 1.2082431316375732, + "learning_rate": 4.979610551675311e-05, + "loss": 2.044, + "step": 450 + }, + { + "epoch": 0.21718602455146366, + "grad_norm": 0.9695537090301514, + "learning_rate": 4.978652545588137e-05, + "loss": 2.0439, + "step": 460 + }, + { + "epoch": 0.2219074598677998, + "grad_norm": 1.1563127040863037, + "learning_rate": 4.977672643484889e-05, + "loss": 2.017, + "step": 470 + }, + { + "epoch": 0.22662889518413598, + "grad_norm": 1.26643705368042, + "learning_rate": 4.976670854021862e-05, + "loss": 1.9652, + "step": 480 + }, + { + "epoch": 0.23135033050047216, + "grad_norm": 1.539183259010315, + "learning_rate": 4.975647186048703e-05, + "loss": 1.9621, + "step": 490 + }, + { + "epoch": 0.2360717658168083, + "grad_norm": 1.4867112636566162, + "learning_rate": 4.97460164860833e-05, + "loss": 1.9534, + "step": 500 + }, + { + "epoch": 0.2360717658168083, + "eval_loss": 1.982257604598999, + "eval_runtime": 41.1055, + "eval_samples_per_second": 21.7, + "eval_steps_per_second": 2.725, + "step": 500 + }, + { + "epoch": 0.24079320113314448, + "grad_norm": 1.3039374351501465, + "learning_rate": 4.973534250936851e-05, + "loss": 1.9625, + "step": 510 + }, + { + "epoch": 0.24551463644948066, + "grad_norm": 1.215240478515625, + "learning_rate": 4.972445002463485e-05, + "loss": 1.9728, + "step": 520 + }, + { + "epoch": 0.2502360717658168, + "grad_norm": 1.4657334089279175, + "learning_rate": 4.971333912810476e-05, + "loss": 1.9505, + "step": 530 + }, + { + "epoch": 0.254957507082153, + "grad_norm": 1.0312436819076538, + "learning_rate": 4.970200991793012e-05, + "loss": 1.9735, + "step": 540 + }, + { + "epoch": 0.25967894239848915, + "grad_norm": 1.0905646085739136, + "learning_rate": 4.969046249419133e-05, + "loss": 1.9265, + "step": 550 + }, + { + "epoch": 0.26440037771482533, + "grad_norm": 1.271078109741211, + "learning_rate": 4.967869695889647e-05, + "loss": 1.9292, + "step": 560 + }, + { + "epoch": 0.26912181303116145, + "grad_norm": 1.5857294797897339, + "learning_rate": 4.966671341598037e-05, + "loss": 1.9518, + "step": 570 + }, + { + "epoch": 0.2738432483474976, + "grad_norm": 1.201617956161499, + "learning_rate": 4.965451197130373e-05, + "loss": 1.9796, + "step": 580 + }, + { + "epoch": 0.2785646836638338, + "grad_norm": 1.4783813953399658, + "learning_rate": 4.964209273265212e-05, + "loss": 1.9494, + "step": 590 + }, + { + "epoch": 0.28328611898017, + "grad_norm": 1.237426996231079, + "learning_rate": 4.9629455809735105e-05, + "loss": 1.8621, + "step": 600 + }, + { + "epoch": 0.28328611898017, + "eval_loss": 1.9678794145584106, + "eval_runtime": 41.1041, + "eval_samples_per_second": 21.701, + "eval_steps_per_second": 2.725, + "step": 600 + }, + { + "epoch": 0.28800755429650615, + "grad_norm": 1.0407463312149048, + "learning_rate": 4.9616601314185206e-05, + "loss": 1.8698, + "step": 610 + }, + { + "epoch": 0.2927289896128423, + "grad_norm": 1.175148844718933, + "learning_rate": 4.9603529359556975e-05, + "loss": 1.9089, + "step": 620 + }, + { + "epoch": 0.29745042492917845, + "grad_norm": 1.4049919843673706, + "learning_rate": 4.959024006132593e-05, + "loss": 2.026, + "step": 630 + }, + { + "epoch": 0.3021718602455146, + "grad_norm": 1.3838235139846802, + "learning_rate": 4.95767335368876e-05, + "loss": 1.9059, + "step": 640 + }, + { + "epoch": 0.3068932955618508, + "grad_norm": 1.1748583316802979, + "learning_rate": 4.956300990555643e-05, + "loss": 1.973, + "step": 650 + }, + { + "epoch": 0.311614730878187, + "grad_norm": 1.4677692651748657, + "learning_rate": 4.954906928856476e-05, + "loss": 1.9296, + "step": 660 + }, + { + "epoch": 0.31633616619452315, + "grad_norm": 1.3763841390609741, + "learning_rate": 4.953491180906175e-05, + "loss": 1.9321, + "step": 670 + }, + { + "epoch": 0.3210576015108593, + "grad_norm": 1.053803563117981, + "learning_rate": 4.952053759211229e-05, + "loss": 2.0338, + "step": 680 + }, + { + "epoch": 0.32577903682719545, + "grad_norm": 1.45322585105896, + "learning_rate": 4.9505946764695885e-05, + "loss": 1.9328, + "step": 690 + }, + { + "epoch": 0.3305004721435316, + "grad_norm": 1.3070508241653442, + "learning_rate": 4.949113945570555e-05, + "loss": 1.9777, + "step": 700 + }, + { + "epoch": 0.3305004721435316, + "eval_loss": 1.9610685110092163, + "eval_runtime": 41.0861, + "eval_samples_per_second": 21.71, + "eval_steps_per_second": 2.726, + "step": 700 + }, + { + "epoch": 0.3352219074598678, + "grad_norm": 1.031301498413086, + "learning_rate": 4.947611579594666e-05, + "loss": 2.0113, + "step": 710 + }, + { + "epoch": 0.33994334277620397, + "grad_norm": 1.3410820960998535, + "learning_rate": 4.9460875918135804e-05, + "loss": 1.9097, + "step": 720 + }, + { + "epoch": 0.34466477809254015, + "grad_norm": 1.1345252990722656, + "learning_rate": 4.944541995689958e-05, + "loss": 1.9226, + "step": 730 + }, + { + "epoch": 0.3493862134088763, + "grad_norm": 1.2572389841079712, + "learning_rate": 4.9429748048773475e-05, + "loss": 1.9583, + "step": 740 + }, + { + "epoch": 0.35410764872521244, + "grad_norm": 1.2059199810028076, + "learning_rate": 4.941386033220058e-05, + "loss": 1.9101, + "step": 750 + }, + { + "epoch": 0.3588290840415486, + "grad_norm": 1.2732455730438232, + "learning_rate": 4.9397756947530414e-05, + "loss": 1.9238, + "step": 760 + }, + { + "epoch": 0.3635505193578848, + "grad_norm": 1.2681666612625122, + "learning_rate": 4.938143803701769e-05, + "loss": 1.867, + "step": 770 + }, + { + "epoch": 0.36827195467422097, + "grad_norm": 1.1062850952148438, + "learning_rate": 4.9364903744821014e-05, + "loss": 1.9233, + "step": 780 + }, + { + "epoch": 0.37299338999055714, + "grad_norm": 1.5268208980560303, + "learning_rate": 4.934815421700165e-05, + "loss": 1.8599, + "step": 790 + }, + { + "epoch": 0.3777148253068933, + "grad_norm": 1.4348393678665161, + "learning_rate": 4.933118960152222e-05, + "loss": 2.0865, + "step": 800 + }, + { + "epoch": 0.3777148253068933, + "eval_loss": 1.954448938369751, + "eval_runtime": 41.0954, + "eval_samples_per_second": 21.706, + "eval_steps_per_second": 2.725, + "step": 800 + }, + { + "epoch": 0.38243626062322944, + "grad_norm": 1.1293368339538574, + "learning_rate": 4.931401004824541e-05, + "loss": 1.9036, + "step": 810 + }, + { + "epoch": 0.3871576959395656, + "grad_norm": 1.2953282594680786, + "learning_rate": 4.92966157089326e-05, + "loss": 1.92, + "step": 820 + }, + { + "epoch": 0.3918791312559018, + "grad_norm": 1.5220308303833008, + "learning_rate": 4.927900673724259e-05, + "loss": 1.9894, + "step": 830 + }, + { + "epoch": 0.39660056657223797, + "grad_norm": 1.3064740896224976, + "learning_rate": 4.9261183288730176e-05, + "loss": 1.9978, + "step": 840 + }, + { + "epoch": 0.40132200188857414, + "grad_norm": 1.2408039569854736, + "learning_rate": 4.9243145520844834e-05, + "loss": 1.8262, + "step": 850 + }, + { + "epoch": 0.4060434372049103, + "grad_norm": 1.314778208732605, + "learning_rate": 4.9224893592929275e-05, + "loss": 1.907, + "step": 860 + }, + { + "epoch": 0.41076487252124644, + "grad_norm": 1.2585623264312744, + "learning_rate": 4.92064276662181e-05, + "loss": 1.9766, + "step": 870 + }, + { + "epoch": 0.4154863078375826, + "grad_norm": 1.2925095558166504, + "learning_rate": 4.9187747903836303e-05, + "loss": 1.9038, + "step": 880 + }, + { + "epoch": 0.4202077431539188, + "grad_norm": 1.3546730279922485, + "learning_rate": 4.9168854470797904e-05, + "loss": 1.9733, + "step": 890 + }, + { + "epoch": 0.42492917847025496, + "grad_norm": 1.3911082744598389, + "learning_rate": 4.914974753400443e-05, + "loss": 1.9662, + "step": 900 + }, + { + "epoch": 0.42492917847025496, + "eval_loss": 1.946061372756958, + "eval_runtime": 41.0731, + "eval_samples_per_second": 21.717, + "eval_steps_per_second": 2.727, + "step": 900 + }, + { + "epoch": 0.42965061378659114, + "grad_norm": 1.3016057014465332, + "learning_rate": 4.913042726224347e-05, + "loss": 2.0209, + "step": 910 + }, + { + "epoch": 0.4343720491029273, + "grad_norm": 1.200618863105774, + "learning_rate": 4.911089382618718e-05, + "loss": 1.8343, + "step": 920 + }, + { + "epoch": 0.43909348441926344, + "grad_norm": 1.2423232793807983, + "learning_rate": 4.909114739839079e-05, + "loss": 1.952, + "step": 930 + }, + { + "epoch": 0.4438149197355996, + "grad_norm": 1.1997716426849365, + "learning_rate": 4.907118815329104e-05, + "loss": 1.9064, + "step": 940 + }, + { + "epoch": 0.4485363550519358, + "grad_norm": 1.3008543252944946, + "learning_rate": 4.905101626720469e-05, + "loss": 1.883, + "step": 950 + }, + { + "epoch": 0.45325779036827196, + "grad_norm": 1.4466650485992432, + "learning_rate": 4.903063191832691e-05, + "loss": 1.8716, + "step": 960 + }, + { + "epoch": 0.45797922568460814, + "grad_norm": 1.3495376110076904, + "learning_rate": 4.901003528672975e-05, + "loss": 1.9534, + "step": 970 + }, + { + "epoch": 0.4627006610009443, + "grad_norm": 1.4013429880142212, + "learning_rate": 4.898922655436052e-05, + "loss": 1.8552, + "step": 980 + }, + { + "epoch": 0.46742209631728043, + "grad_norm": 1.716334581375122, + "learning_rate": 4.8968205905040207e-05, + "loss": 1.974, + "step": 990 + }, + { + "epoch": 0.4721435316336166, + "grad_norm": 1.435140609741211, + "learning_rate": 4.894697352446182e-05, + "loss": 1.8352, + "step": 1000 + }, + { + "epoch": 0.4721435316336166, + "eval_loss": 1.9375569820404053, + "eval_runtime": 41.1017, + "eval_samples_per_second": 21.702, + "eval_steps_per_second": 2.725, + "step": 1000 + }, + { + "epoch": 0.4768649669499528, + "grad_norm": 1.7291275262832642, + "learning_rate": 4.8925529600188794e-05, + "loss": 1.9139, + "step": 1010 + }, + { + "epoch": 0.48158640226628896, + "grad_norm": 1.2678258419036865, + "learning_rate": 4.8903874321653274e-05, + "loss": 1.8118, + "step": 1020 + }, + { + "epoch": 0.48630783758262514, + "grad_norm": 1.5206748247146606, + "learning_rate": 4.88820078801545e-05, + "loss": 1.9924, + "step": 1030 + }, + { + "epoch": 0.4910292728989613, + "grad_norm": 1.4560861587524414, + "learning_rate": 4.885993046885708e-05, + "loss": 1.8814, + "step": 1040 + }, + { + "epoch": 0.49575070821529743, + "grad_norm": 1.150933861732483, + "learning_rate": 4.883764228278931e-05, + "loss": 1.8928, + "step": 1050 + }, + { + "epoch": 0.5004721435316336, + "grad_norm": 1.4958614110946655, + "learning_rate": 4.881514351884141e-05, + "loss": 1.9071, + "step": 1060 + }, + { + "epoch": 0.5051935788479698, + "grad_norm": 1.296898603439331, + "learning_rate": 4.879243437576383e-05, + "loss": 1.939, + "step": 1070 + }, + { + "epoch": 0.509915014164306, + "grad_norm": 1.6953368186950684, + "learning_rate": 4.876951505416547e-05, + "loss": 1.8603, + "step": 1080 + }, + { + "epoch": 0.5146364494806421, + "grad_norm": 1.2347371578216553, + "learning_rate": 4.8746385756511915e-05, + "loss": 1.86, + "step": 1090 + }, + { + "epoch": 0.5193578847969783, + "grad_norm": 2.050234794616699, + "learning_rate": 4.872304668712364e-05, + "loss": 1.8973, + "step": 1100 + }, + { + "epoch": 0.5193578847969783, + "eval_loss": 1.932855248451233, + "eval_runtime": 41.1127, + "eval_samples_per_second": 21.696, + "eval_steps_per_second": 2.724, + "step": 1100 + }, + { + "epoch": 0.5240793201133145, + "grad_norm": 1.5210719108581543, + "learning_rate": 4.8699498052174205e-05, + "loss": 1.9125, + "step": 1110 + }, + { + "epoch": 0.5288007554296507, + "grad_norm": 1.17880380153656, + "learning_rate": 4.867574005968847e-05, + "loss": 2.001, + "step": 1120 + }, + { + "epoch": 0.5335221907459868, + "grad_norm": 1.2916531562805176, + "learning_rate": 4.8651772919540686e-05, + "loss": 1.8442, + "step": 1130 + }, + { + "epoch": 0.5382436260623229, + "grad_norm": 1.2535613775253296, + "learning_rate": 4.862759684345269e-05, + "loss": 1.9932, + "step": 1140 + }, + { + "epoch": 0.5429650613786591, + "grad_norm": 1.2579909563064575, + "learning_rate": 4.860321204499205e-05, + "loss": 1.8388, + "step": 1150 + }, + { + "epoch": 0.5476864966949953, + "grad_norm": 1.207390546798706, + "learning_rate": 4.857861873957011e-05, + "loss": 1.7975, + "step": 1160 + }, + { + "epoch": 0.5524079320113314, + "grad_norm": 1.574354648590088, + "learning_rate": 4.855381714444018e-05, + "loss": 1.848, + "step": 1170 + }, + { + "epoch": 0.5571293673276676, + "grad_norm": 1.6396162509918213, + "learning_rate": 4.8528807478695535e-05, + "loss": 1.9311, + "step": 1180 + }, + { + "epoch": 0.5618508026440038, + "grad_norm": 1.4158663749694824, + "learning_rate": 4.850358996326753e-05, + "loss": 1.8705, + "step": 1190 + }, + { + "epoch": 0.56657223796034, + "grad_norm": 1.4436795711517334, + "learning_rate": 4.84781648209236e-05, + "loss": 1.9688, + "step": 1200 + }, + { + "epoch": 0.56657223796034, + "eval_loss": 1.9264005422592163, + "eval_runtime": 41.1199, + "eval_samples_per_second": 21.693, + "eval_steps_per_second": 2.724, + "step": 1200 + }, + { + "epoch": 0.5712936732766761, + "grad_norm": 1.747074007987976, + "learning_rate": 4.8452532276265364e-05, + "loss": 1.9038, + "step": 1210 + }, + { + "epoch": 0.5760151085930123, + "grad_norm": 1.5841553211212158, + "learning_rate": 4.842669255572656e-05, + "loss": 1.9581, + "step": 1220 + }, + { + "epoch": 0.5807365439093485, + "grad_norm": 1.4784042835235596, + "learning_rate": 4.8400645887571126e-05, + "loss": 1.8913, + "step": 1230 + }, + { + "epoch": 0.5854579792256847, + "grad_norm": 1.332627296447754, + "learning_rate": 4.837439250189111e-05, + "loss": 1.8597, + "step": 1240 + }, + { + "epoch": 0.5901794145420207, + "grad_norm": 1.0213313102722168, + "learning_rate": 4.834793263060468e-05, + "loss": 1.9072, + "step": 1250 + }, + { + "epoch": 0.5949008498583569, + "grad_norm": 1.4618474245071411, + "learning_rate": 4.832126650745405e-05, + "loss": 1.9781, + "step": 1260 + }, + { + "epoch": 0.5996222851746931, + "grad_norm": 1.6018431186676025, + "learning_rate": 4.829439436800346e-05, + "loss": 1.885, + "step": 1270 + }, + { + "epoch": 0.6043437204910292, + "grad_norm": 1.3701698780059814, + "learning_rate": 4.8267316449637054e-05, + "loss": 1.8891, + "step": 1280 + }, + { + "epoch": 0.6090651558073654, + "grad_norm": 1.6125386953353882, + "learning_rate": 4.8240032991556765e-05, + "loss": 1.8654, + "step": 1290 + }, + { + "epoch": 0.6137865911237016, + "grad_norm": 1.3987213373184204, + "learning_rate": 4.821254423478027e-05, + "loss": 1.8383, + "step": 1300 + }, + { + "epoch": 0.6137865911237016, + "eval_loss": 1.9191977977752686, + "eval_runtime": 41.0939, + "eval_samples_per_second": 21.706, + "eval_steps_per_second": 2.725, + "step": 1300 + }, + { + "epoch": 0.6185080264400378, + "grad_norm": 1.4958666563034058, + "learning_rate": 4.8184850422138795e-05, + "loss": 1.8538, + "step": 1310 + }, + { + "epoch": 0.623229461756374, + "grad_norm": 1.6139039993286133, + "learning_rate": 4.815695179827502e-05, + "loss": 1.9403, + "step": 1320 + }, + { + "epoch": 0.6279508970727101, + "grad_norm": 1.429937481880188, + "learning_rate": 4.812884860964086e-05, + "loss": 1.8925, + "step": 1330 + }, + { + "epoch": 0.6326723323890463, + "grad_norm": 1.4869327545166016, + "learning_rate": 4.8100541104495355e-05, + "loss": 1.8849, + "step": 1340 + }, + { + "epoch": 0.6373937677053825, + "grad_norm": 1.32284677028656, + "learning_rate": 4.8072029532902426e-05, + "loss": 1.8733, + "step": 1350 + }, + { + "epoch": 0.6421152030217187, + "grad_norm": 1.659633994102478, + "learning_rate": 4.8043314146728705e-05, + "loss": 1.9357, + "step": 1360 + }, + { + "epoch": 0.6468366383380547, + "grad_norm": 1.524124264717102, + "learning_rate": 4.8014395199641246e-05, + "loss": 1.7913, + "step": 1370 + }, + { + "epoch": 0.6515580736543909, + "grad_norm": 1.294195294380188, + "learning_rate": 4.798527294710538e-05, + "loss": 1.9151, + "step": 1380 + }, + { + "epoch": 0.6562795089707271, + "grad_norm": 1.8346223831176758, + "learning_rate": 4.795594764638237e-05, + "loss": 1.9297, + "step": 1390 + }, + { + "epoch": 0.6610009442870632, + "grad_norm": 1.457963466644287, + "learning_rate": 4.792641955652718e-05, + "loss": 1.9032, + "step": 1400 + }, + { + "epoch": 0.6610009442870632, + "eval_loss": 1.914588212966919, + "eval_runtime": 41.1099, + "eval_samples_per_second": 21.698, + "eval_steps_per_second": 2.724, + "step": 1400 + }, + { + "epoch": 0.6657223796033994, + "grad_norm": 1.5672495365142822, + "learning_rate": 4.7896688938386195e-05, + "loss": 1.9032, + "step": 1410 + }, + { + "epoch": 0.6704438149197356, + "grad_norm": 1.5081193447113037, + "learning_rate": 4.786675605459487e-05, + "loss": 1.8854, + "step": 1420 + }, + { + "epoch": 0.6751652502360718, + "grad_norm": 1.450073003768921, + "learning_rate": 4.7836621169575494e-05, + "loss": 1.8865, + "step": 1430 + }, + { + "epoch": 0.6798866855524079, + "grad_norm": 1.4455468654632568, + "learning_rate": 4.7806284549534755e-05, + "loss": 1.7515, + "step": 1440 + }, + { + "epoch": 0.6846081208687441, + "grad_norm": 1.7873055934906006, + "learning_rate": 4.7775746462461446e-05, + "loss": 1.9624, + "step": 1450 + }, + { + "epoch": 0.6893295561850803, + "grad_norm": 1.5174776315689087, + "learning_rate": 4.7745007178124114e-05, + "loss": 1.7875, + "step": 1460 + }, + { + "epoch": 0.6940509915014165, + "grad_norm": 1.6502797603607178, + "learning_rate": 4.771406696806861e-05, + "loss": 1.8984, + "step": 1470 + }, + { + "epoch": 0.6987724268177526, + "grad_norm": 1.3501724004745483, + "learning_rate": 4.7682926105615754e-05, + "loss": 1.8594, + "step": 1480 + }, + { + "epoch": 0.7034938621340887, + "grad_norm": 1.442497730255127, + "learning_rate": 4.76515848658589e-05, + "loss": 1.8484, + "step": 1490 + }, + { + "epoch": 0.7082152974504249, + "grad_norm": 1.346633791923523, + "learning_rate": 4.76200435256615e-05, + "loss": 1.9295, + "step": 1500 + }, + { + "epoch": 0.7082152974504249, + "eval_loss": 1.9108749628067017, + "eval_runtime": 41.131, + "eval_samples_per_second": 21.687, + "eval_steps_per_second": 2.723, + "step": 1500 + }, + { + "epoch": 0.7129367327667611, + "grad_norm": 1.684790849685669, + "learning_rate": 4.758830236365465e-05, + "loss": 1.8586, + "step": 1510 + }, + { + "epoch": 0.7176581680830972, + "grad_norm": 1.508821725845337, + "learning_rate": 4.7556361660234634e-05, + "loss": 1.9794, + "step": 1520 + }, + { + "epoch": 0.7223796033994334, + "grad_norm": 1.526046872138977, + "learning_rate": 4.752422169756048e-05, + "loss": 1.9122, + "step": 1530 + }, + { + "epoch": 0.7271010387157696, + "grad_norm": 2.1538867950439453, + "learning_rate": 4.749188275955143e-05, + "loss": 1.9197, + "step": 1540 + }, + { + "epoch": 0.7318224740321058, + "grad_norm": 1.4204916954040527, + "learning_rate": 4.745934513188442e-05, + "loss": 1.8548, + "step": 1550 + }, + { + "epoch": 0.7365439093484419, + "grad_norm": 1.319259524345398, + "learning_rate": 4.7426609101991605e-05, + "loss": 1.8857, + "step": 1560 + }, + { + "epoch": 0.7412653446647781, + "grad_norm": 1.351597785949707, + "learning_rate": 4.739367495905778e-05, + "loss": 1.876, + "step": 1570 + }, + { + "epoch": 0.7459867799811143, + "grad_norm": 1.51447331905365, + "learning_rate": 4.736054299401785e-05, + "loss": 1.9355, + "step": 1580 + }, + { + "epoch": 0.7507082152974505, + "grad_norm": 1.2614985704421997, + "learning_rate": 4.7327213499554234e-05, + "loss": 1.9286, + "step": 1590 + }, + { + "epoch": 0.7554296506137866, + "grad_norm": 1.0692663192749023, + "learning_rate": 4.7293686770094294e-05, + "loss": 1.8207, + "step": 1600 + }, + { + "epoch": 0.7554296506137866, + "eval_loss": 1.906082034111023, + "eval_runtime": 41.0972, + "eval_samples_per_second": 21.705, + "eval_steps_per_second": 2.725, + "step": 1600 + }, + { + "epoch": 0.7601510859301227, + "grad_norm": 1.4698175191879272, + "learning_rate": 4.725996310180776e-05, + "loss": 1.9245, + "step": 1610 + }, + { + "epoch": 0.7648725212464589, + "grad_norm": 1.2762446403503418, + "learning_rate": 4.7226042792604046e-05, + "loss": 1.8556, + "step": 1620 + }, + { + "epoch": 0.7695939565627951, + "grad_norm": 1.7315044403076172, + "learning_rate": 4.719192614212969e-05, + "loss": 1.9757, + "step": 1630 + }, + { + "epoch": 0.7743153918791312, + "grad_norm": 1.228724479675293, + "learning_rate": 4.7157613451765686e-05, + "loss": 2.0371, + "step": 1640 + }, + { + "epoch": 0.7790368271954674, + "grad_norm": 1.7622945308685303, + "learning_rate": 4.7123105024624776e-05, + "loss": 1.8646, + "step": 1650 + }, + { + "epoch": 0.7837582625118036, + "grad_norm": 1.4164314270019531, + "learning_rate": 4.708840116554883e-05, + "loss": 1.8383, + "step": 1660 + }, + { + "epoch": 0.7884796978281398, + "grad_norm": 1.8074839115142822, + "learning_rate": 4.7053502181106145e-05, + "loss": 1.9008, + "step": 1670 + }, + { + "epoch": 0.7932011331444759, + "grad_norm": 1.5293446779251099, + "learning_rate": 4.70184083795887e-05, + "loss": 1.8378, + "step": 1680 + }, + { + "epoch": 0.7979225684608121, + "grad_norm": 1.3560067415237427, + "learning_rate": 4.698312007100947e-05, + "loss": 1.8468, + "step": 1690 + }, + { + "epoch": 0.8026440037771483, + "grad_norm": 1.6982842683792114, + "learning_rate": 4.694763756709967e-05, + "loss": 1.9119, + "step": 1700 + }, + { + "epoch": 0.8026440037771483, + "eval_loss": 1.9032281637191772, + "eval_runtime": 41.2928, + "eval_samples_per_second": 21.602, + "eval_steps_per_second": 2.712, + "step": 1700 + }, + { + "epoch": 0.8073654390934845, + "grad_norm": 1.5927674770355225, + "learning_rate": 4.691196118130601e-05, + "loss": 1.9081, + "step": 1710 + }, + { + "epoch": 0.8120868744098206, + "grad_norm": 1.4806030988693237, + "learning_rate": 4.687609122878791e-05, + "loss": 1.8604, + "step": 1720 + }, + { + "epoch": 0.8168083097261567, + "grad_norm": 1.5093107223510742, + "learning_rate": 4.6840028026414745e-05, + "loss": 1.7843, + "step": 1730 + }, + { + "epoch": 0.8215297450424929, + "grad_norm": 1.2747288942337036, + "learning_rate": 4.6803771892763004e-05, + "loss": 1.8666, + "step": 1740 + }, + { + "epoch": 0.826251180358829, + "grad_norm": 1.614396333694458, + "learning_rate": 4.676732314811353e-05, + "loss": 1.8538, + "step": 1750 + }, + { + "epoch": 0.8309726156751652, + "grad_norm": 1.3834142684936523, + "learning_rate": 4.673068211444862e-05, + "loss": 1.8492, + "step": 1760 + }, + { + "epoch": 0.8356940509915014, + "grad_norm": 1.6884135007858276, + "learning_rate": 4.669384911544927e-05, + "loss": 1.8554, + "step": 1770 + }, + { + "epoch": 0.8404154863078376, + "grad_norm": 1.3983336687088013, + "learning_rate": 4.665682447649222e-05, + "loss": 1.9333, + "step": 1780 + }, + { + "epoch": 0.8451369216241738, + "grad_norm": 1.45684015750885, + "learning_rate": 4.661960852464717e-05, + "loss": 1.7886, + "step": 1790 + }, + { + "epoch": 0.8498583569405099, + "grad_norm": 1.4929298162460327, + "learning_rate": 4.6582201588673816e-05, + "loss": 1.8392, + "step": 1800 + }, + { + "epoch": 0.8498583569405099, + "eval_loss": 1.9019125699996948, + "eval_runtime": 41.2022, + "eval_samples_per_second": 21.649, + "eval_steps_per_second": 2.718, + "step": 1800 + }, + { + "epoch": 0.8545797922568461, + "grad_norm": 1.9542425870895386, + "learning_rate": 4.6544603999018966e-05, + "loss": 1.8178, + "step": 1810 + }, + { + "epoch": 0.8593012275731823, + "grad_norm": 1.7367998361587524, + "learning_rate": 4.6506816087813685e-05, + "loss": 1.9773, + "step": 1820 + }, + { + "epoch": 0.8640226628895185, + "grad_norm": 1.410551905632019, + "learning_rate": 4.646883818887025e-05, + "loss": 1.7574, + "step": 1830 + }, + { + "epoch": 0.8687440982058546, + "grad_norm": 1.2788314819335938, + "learning_rate": 4.6430670637679295e-05, + "loss": 1.9249, + "step": 1840 + }, + { + "epoch": 0.8734655335221907, + "grad_norm": 1.1406760215759277, + "learning_rate": 4.63923137714068e-05, + "loss": 1.8666, + "step": 1850 + }, + { + "epoch": 0.8781869688385269, + "grad_norm": 1.0523242950439453, + "learning_rate": 4.635376792889111e-05, + "loss": 1.8948, + "step": 1860 + }, + { + "epoch": 0.882908404154863, + "grad_norm": 1.2536702156066895, + "learning_rate": 4.6315033450639996e-05, + "loss": 1.8183, + "step": 1870 + }, + { + "epoch": 0.8876298394711992, + "grad_norm": 1.2993184328079224, + "learning_rate": 4.6276110678827555e-05, + "loss": 1.8918, + "step": 1880 + }, + { + "epoch": 0.8923512747875354, + "grad_norm": 1.475024700164795, + "learning_rate": 4.6236999957291275e-05, + "loss": 1.8303, + "step": 1890 + }, + { + "epoch": 0.8970727101038716, + "grad_norm": 1.4702178239822388, + "learning_rate": 4.619770163152896e-05, + "loss": 1.961, + "step": 1900 + }, + { + "epoch": 0.8970727101038716, + "eval_loss": 1.8994309902191162, + "eval_runtime": 41.0958, + "eval_samples_per_second": 21.705, + "eval_steps_per_second": 2.725, + "step": 1900 + }, + { + "epoch": 0.9017941454202077, + "grad_norm": 1.4287694692611694, + "learning_rate": 4.615821604869564e-05, + "loss": 1.8132, + "step": 1910 + }, + { + "epoch": 0.9065155807365439, + "grad_norm": 1.98855721950531, + "learning_rate": 4.61185435576006e-05, + "loss": 1.8418, + "step": 1920 + }, + { + "epoch": 0.9112370160528801, + "grad_norm": 1.482932209968567, + "learning_rate": 4.607868450870421e-05, + "loss": 1.8774, + "step": 1930 + }, + { + "epoch": 0.9159584513692163, + "grad_norm": 1.6554712057113647, + "learning_rate": 4.6038639254114855e-05, + "loss": 1.9419, + "step": 1940 + }, + { + "epoch": 0.9206798866855525, + "grad_norm": 1.5708400011062622, + "learning_rate": 4.599840814758587e-05, + "loss": 2.0166, + "step": 1950 + }, + { + "epoch": 0.9254013220018886, + "grad_norm": 1.4725310802459717, + "learning_rate": 4.5957991544512316e-05, + "loss": 1.9425, + "step": 1960 + }, + { + "epoch": 0.9301227573182247, + "grad_norm": 1.569560170173645, + "learning_rate": 4.591738980192796e-05, + "loss": 1.8323, + "step": 1970 + }, + { + "epoch": 0.9348441926345609, + "grad_norm": 1.2551137208938599, + "learning_rate": 4.587660327850203e-05, + "loss": 1.8055, + "step": 1980 + }, + { + "epoch": 0.939565627950897, + "grad_norm": 1.6381155252456665, + "learning_rate": 4.583563233453607e-05, + "loss": 1.7541, + "step": 1990 + }, + { + "epoch": 0.9442870632672332, + "grad_norm": 1.6381361484527588, + "learning_rate": 4.579447733196079e-05, + "loss": 1.8913, + "step": 2000 + }, + { + "epoch": 0.9442870632672332, + "eval_loss": 1.8945337533950806, + "eval_runtime": 41.0799, + "eval_samples_per_second": 21.714, + "eval_steps_per_second": 2.726, + "step": 2000 + }, + { + "epoch": 0.9490084985835694, + "grad_norm": 1.4363571405410767, + "learning_rate": 4.5753138634332835e-05, + "loss": 1.8698, + "step": 2010 + }, + { + "epoch": 0.9537299338999056, + "grad_norm": 1.833264708518982, + "learning_rate": 4.5711616606831576e-05, + "loss": 1.893, + "step": 2020 + }, + { + "epoch": 0.9584513692162417, + "grad_norm": 1.4078890085220337, + "learning_rate": 4.566991161625589e-05, + "loss": 1.965, + "step": 2030 + }, + { + "epoch": 0.9631728045325779, + "grad_norm": 1.8140511512756348, + "learning_rate": 4.562802403102093e-05, + "loss": 1.7615, + "step": 2040 + }, + { + "epoch": 0.9678942398489141, + "grad_norm": 1.6720659732818604, + "learning_rate": 4.5585954221154856e-05, + "loss": 1.8079, + "step": 2050 + }, + { + "epoch": 0.9726156751652503, + "grad_norm": 1.3955872058868408, + "learning_rate": 4.554370255829558e-05, + "loss": 1.8026, + "step": 2060 + }, + { + "epoch": 0.9773371104815864, + "grad_norm": 1.525856375694275, + "learning_rate": 4.550126941568744e-05, + "loss": 1.8584, + "step": 2070 + }, + { + "epoch": 0.9820585457979226, + "grad_norm": 1.526258111000061, + "learning_rate": 4.5458655168177974e-05, + "loss": 1.7369, + "step": 2080 + }, + { + "epoch": 0.9867799811142587, + "grad_norm": 1.4565373659133911, + "learning_rate": 4.541586019221457e-05, + "loss": 1.8483, + "step": 2090 + }, + { + "epoch": 0.9915014164305949, + "grad_norm": 1.767482042312622, + "learning_rate": 4.5372884865841114e-05, + "loss": 1.8187, + "step": 2100 + }, + { + "epoch": 0.9915014164305949, + "eval_loss": 1.8941270112991333, + "eval_runtime": 41.105, + "eval_samples_per_second": 21.701, + "eval_steps_per_second": 2.725, + "step": 2100 + }, + { + "epoch": 0.996222851746931, + "grad_norm": 1.748547077178955, + "learning_rate": 4.532972956869471e-05, + "loss": 1.9258, + "step": 2110 + }, + { + "epoch": 1.0009442870632672, + "grad_norm": 1.0939620733261108, + "learning_rate": 4.528639468200226e-05, + "loss": 1.8346, + "step": 2120 + }, + { + "epoch": 1.0056657223796035, + "grad_norm": 1.4557725191116333, + "learning_rate": 4.524288058857717e-05, + "loss": 1.7275, + "step": 2130 + }, + { + "epoch": 1.0103871576959396, + "grad_norm": 1.2835229635238647, + "learning_rate": 4.51991876728159e-05, + "loss": 1.6719, + "step": 2140 + }, + { + "epoch": 1.0151085930122756, + "grad_norm": 1.5596965551376343, + "learning_rate": 4.515531632069461e-05, + "loss": 1.8067, + "step": 2150 + }, + { + "epoch": 1.019830028328612, + "grad_norm": 1.5260546207427979, + "learning_rate": 4.511126691976574e-05, + "loss": 1.7311, + "step": 2160 + }, + { + "epoch": 1.024551463644948, + "grad_norm": 2.1776397228240967, + "learning_rate": 4.506703985915457e-05, + "loss": 1.8171, + "step": 2170 + }, + { + "epoch": 1.0292728989612843, + "grad_norm": 1.8992706537246704, + "learning_rate": 4.502263552955581e-05, + "loss": 1.6716, + "step": 2180 + }, + { + "epoch": 1.0339943342776203, + "grad_norm": 1.9256116151809692, + "learning_rate": 4.497805432323015e-05, + "loss": 1.7456, + "step": 2190 + }, + { + "epoch": 1.0387157695939566, + "grad_norm": 1.5675586462020874, + "learning_rate": 4.4933296634000734e-05, + "loss": 1.7296, + "step": 2200 + }, + { + "epoch": 1.0387157695939566, + "eval_loss": 1.9005860090255737, + "eval_runtime": 41.0876, + "eval_samples_per_second": 21.71, + "eval_steps_per_second": 2.726, + "step": 2200 + }, + { + "epoch": 1.0434372049102927, + "grad_norm": 1.6989753246307373, + "learning_rate": 4.4888362857249775e-05, + "loss": 1.7202, + "step": 2210 + }, + { + "epoch": 1.048158640226629, + "grad_norm": 1.9172133207321167, + "learning_rate": 4.484325338991499e-05, + "loss": 1.7879, + "step": 2220 + }, + { + "epoch": 1.052880075542965, + "grad_norm": 2.1512560844421387, + "learning_rate": 4.4797968630486135e-05, + "loss": 1.7202, + "step": 2230 + }, + { + "epoch": 1.0576015108593013, + "grad_norm": 2.3440024852752686, + "learning_rate": 4.475250897900144e-05, + "loss": 1.6839, + "step": 2240 + }, + { + "epoch": 1.0623229461756374, + "grad_norm": 1.758452296257019, + "learning_rate": 4.470687483704413e-05, + "loss": 1.7942, + "step": 2250 + }, + { + "epoch": 1.0670443814919737, + "grad_norm": 1.96663498878479, + "learning_rate": 4.466106660773885e-05, + "loss": 1.7255, + "step": 2260 + }, + { + "epoch": 1.0717658168083097, + "grad_norm": 2.048264980316162, + "learning_rate": 4.4615084695748074e-05, + "loss": 1.6271, + "step": 2270 + }, + { + "epoch": 1.0764872521246458, + "grad_norm": 1.7712353467941284, + "learning_rate": 4.456892950726861e-05, + "loss": 1.7444, + "step": 2280 + }, + { + "epoch": 1.081208687440982, + "grad_norm": 2.1486504077911377, + "learning_rate": 4.452260145002791e-05, + "loss": 1.7943, + "step": 2290 + }, + { + "epoch": 1.0859301227573182, + "grad_norm": 1.917653203010559, + "learning_rate": 4.447610093328056e-05, + "loss": 1.6184, + "step": 2300 + }, + { + "epoch": 1.0859301227573182, + "eval_loss": 1.9040113687515259, + "eval_runtime": 41.1418, + "eval_samples_per_second": 21.681, + "eval_steps_per_second": 2.722, + "step": 2300 + }, + { + "epoch": 1.0906515580736544, + "grad_norm": 2.010401725769043, + "learning_rate": 4.4429428367804605e-05, + "loss": 1.7606, + "step": 2310 + }, + { + "epoch": 1.0953729933899905, + "grad_norm": 1.863293170928955, + "learning_rate": 4.438258416589794e-05, + "loss": 1.7419, + "step": 2320 + }, + { + "epoch": 1.1000944287063268, + "grad_norm": 2.40513014793396, + "learning_rate": 4.43355687413747e-05, + "loss": 1.6251, + "step": 2330 + }, + { + "epoch": 1.1048158640226629, + "grad_norm": 1.9660564661026, + "learning_rate": 4.428838250956153e-05, + "loss": 1.7576, + "step": 2340 + }, + { + "epoch": 1.1095372993389991, + "grad_norm": 1.7968907356262207, + "learning_rate": 4.4241025887293976e-05, + "loss": 1.758, + "step": 2350 + }, + { + "epoch": 1.1142587346553352, + "grad_norm": 2.0417661666870117, + "learning_rate": 4.419349929291279e-05, + "loss": 1.759, + "step": 2360 + }, + { + "epoch": 1.1189801699716715, + "grad_norm": 1.7925529479980469, + "learning_rate": 4.414580314626023e-05, + "loss": 1.7568, + "step": 2370 + }, + { + "epoch": 1.1237016052880076, + "grad_norm": 2.122156858444214, + "learning_rate": 4.4097937868676345e-05, + "loss": 1.684, + "step": 2380 + }, + { + "epoch": 1.1284230406043436, + "grad_norm": 2.248425245285034, + "learning_rate": 4.404990388299527e-05, + "loss": 1.6816, + "step": 2390 + }, + { + "epoch": 1.13314447592068, + "grad_norm": 2.147604465484619, + "learning_rate": 4.4001701613541456e-05, + "loss": 1.6973, + "step": 2400 + }, + { + "epoch": 1.13314447592068, + "eval_loss": 1.9056047201156616, + "eval_runtime": 41.0721, + "eval_samples_per_second": 21.718, + "eval_steps_per_second": 2.727, + "step": 2400 + }, + { + "epoch": 1.13314447592068, + "step": 2400, + "total_flos": 9.512959383227597e+17, + "train_loss": 1.9100826263427735, + "train_runtime": 3885.2685, + "train_samples_per_second": 21.805, + "train_steps_per_second": 2.726 + } + ], + "logging_steps": 10, + "max_steps": 10590, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 100, + "total_flos": 9.512959383227597e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama2_13b_peft/topical_chat/training_args.bin b/llama2_13b_peft/topical_chat/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..95709355e0c3be0b2bd443d488fc6d61cc771cfe --- /dev/null +++ b/llama2_13b_peft/topical_chat/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f7494ca477a770eb8a9f51a202a612e05a71973237d5bb7ae54dd4d1ec4b49d +size 5176 diff --git a/llama2_13b_peft/topical_chat/training_eval_loss.png b/llama2_13b_peft/topical_chat/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..aa9bbfc89949f983e02433500c38643c1a322e75 Binary files /dev/null and b/llama2_13b_peft/topical_chat/training_eval_loss.png differ diff --git a/llama2_13b_peft/topical_chat/training_loss.png b/llama2_13b_peft/topical_chat/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..50b461e743f86e7e4a795241623d7ccaec963516 Binary files /dev/null and b/llama2_13b_peft/topical_chat/training_loss.png differ diff --git a/llama2_13b_peft/unit_conversion/README.md b/llama2_13b_peft/unit_conversion/README.md new file mode 100644 index 0000000000000000000000000000000000000000..46b96e39eb696192311be74af2fb95e2f55130a8 --- /dev/null +++ b/llama2_13b_peft/unit_conversion/README.md @@ -0,0 +1,77 @@ +--- +license: other +library_name: peft +tags: +- llama-factory +- lora +- generated_from_trainer +base_model: /data1/model/llama2/meta-llama/Llama2-13b +model-index: +- name: unit_conversion_no_sys + results: [] +--- + + + +# unit_conversion_no_sys + +This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the unit_conversion_no_sys dataset. +It achieves the following results on the evaluation set: +- Loss: 0.3370 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 1e-05 +- train_batch_size: 16 +- eval_batch_size: 16 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 20 +- num_epochs: 5.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:-----:|:----:|:---------------:| +| 0.5422 | 0.39 | 200 | 0.4792 | +| 0.319 | 0.79 | 400 | 0.4168 | +| 0.3583 | 1.18 | 600 | 0.3873 | +| 0.3048 | 1.57 | 800 | 0.3692 | +| 0.4185 | 1.96 | 1000 | 0.3550 | +| 0.3737 | 2.36 | 1200 | 0.3487 | +| 0.2418 | 2.75 | 1400 | 0.3422 | +| 0.2528 | 3.14 | 1600 | 0.3390 | +| 0.3192 | 3.54 | 1800 | 0.3393 | +| 0.2834 | 3.93 | 2000 | 0.3370 | +| 0.3612 | 4.32 | 2200 | 0.3375 | +| 0.2732 | 4.72 | 2400 | 0.3369 | + + +### Framework versions + +- PEFT 0.9.0 +- Transformers 4.38.2 +- Pytorch 2.2.1 +- Datasets 2.18.0 +- Tokenizers 0.15.2 \ No newline at end of file diff --git a/llama2_13b_peft/unit_conversion/adapter_config.json b/llama2_13b_peft/unit_conversion/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..01231b454b0227fa8e9b0ad973e7c2a10d64504a --- /dev/null +++ b/llama2_13b_peft/unit_conversion/adapter_config.json @@ -0,0 +1,33 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "gate_proj", + "v_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama2_13b_peft/unit_conversion/adapter_model.safetensors b/llama2_13b_peft/unit_conversion/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e8902fdfb6542b7f86a5ced874aeae21b553474 --- /dev/null +++ b/llama2_13b_peft/unit_conversion/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e20cbcb46e97e5f6e3e97eae69742c437853adbc601fc782fb3715d6daabd97 +size 125248064 diff --git a/llama2_13b_peft/unit_conversion/all_results.json b/llama2_13b_peft/unit_conversion/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..10a1fa634782a325b03e268c73b4254dd7d0462f --- /dev/null +++ b/llama2_13b_peft/unit_conversion/all_results.json @@ -0,0 +1,11 @@ +{ + "epoch": 5.0, + "eval_loss": 0.3370112180709839, + "eval_runtime": 39.0124, + "eval_samples_per_second": 73.643, + "eval_steps_per_second": 2.307, + "train_loss": 0.3501229747105207, + "train_runtime": 3965.184, + "train_samples_per_second": 20.526, + "train_steps_per_second": 0.642 +} \ No newline at end of file diff --git a/llama2_13b_peft/unit_conversion/eval_results.json b/llama2_13b_peft/unit_conversion/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3026f9de0874f22c6d76caab881b00904d300706 --- /dev/null +++ b/llama2_13b_peft/unit_conversion/eval_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 5.0, + "eval_loss": 0.3370112180709839, + "eval_runtime": 39.0124, + "eval_samples_per_second": 73.643, + "eval_steps_per_second": 2.307 +} \ No newline at end of file diff --git a/llama2_13b_peft/unit_conversion/special_tokens_map.json b/llama2_13b_peft/unit_conversion/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/llama2_13b_peft/unit_conversion/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama2_13b_peft/unit_conversion/tokenizer.model b/llama2_13b_peft/unit_conversion/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/llama2_13b_peft/unit_conversion/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/llama2_13b_peft/unit_conversion/tokenizer_config.json b/llama2_13b_peft/unit_conversion/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad --- /dev/null +++ b/llama2_13b_peft/unit_conversion/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/llama2_13b_peft/unit_conversion/train_results.json b/llama2_13b_peft/unit_conversion/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..74b74ee7b9290e812e3805228f50b477e12286d6 --- /dev/null +++ b/llama2_13b_peft/unit_conversion/train_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 5.0, + "train_loss": 0.3501229747105207, + "train_runtime": 3965.184, + "train_samples_per_second": 20.526, + "train_steps_per_second": 0.642 +} \ No newline at end of file diff --git a/llama2_13b_peft/unit_conversion/trainer_log.jsonl b/llama2_13b_peft/unit_conversion/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d2fe40a29321770c193fe31d8f98184fc16f85df --- /dev/null +++ b/llama2_13b_peft/unit_conversion/trainer_log.jsonl @@ -0,0 +1,268 @@ +{"current_steps": 10, "total_steps": 2545, "loss": 1.6608, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5e-06, "epoch": 0.02, "percentage": 0.39, "elapsed_time": "0:00:17", "remaining_time": "1:12:00"} +{"current_steps": 20, "total_steps": 2545, "loss": 1.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1e-05, "epoch": 0.04, "percentage": 0.79, "elapsed_time": "0:00:30", "remaining_time": "1:04:46"} +{"current_steps": 30, "total_steps": 2545, "loss": 1.6975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99961299962427e-06, "epoch": 0.06, "percentage": 1.18, "elapsed_time": "0:00:46", "remaining_time": "1:04:46"} +{"current_steps": 40, "total_steps": 2545, "loss": 1.3794, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.998452058404793e-06, "epoch": 0.08, "percentage": 1.57, "elapsed_time": "0:00:58", "remaining_time": "1:00:42"} +{"current_steps": 50, "total_steps": 2545, "loss": 1.0864, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.996517356055444e-06, "epoch": 0.1, "percentage": 1.96, "elapsed_time": "0:01:11", "remaining_time": "0:59:07"} +{"current_steps": 60, "total_steps": 2545, "loss": 0.6983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99380919206844e-06, "epoch": 0.12, "percentage": 2.36, "elapsed_time": "0:01:25", "remaining_time": "0:59:04"} +{"current_steps": 70, "total_steps": 2545, "loss": 0.564, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.990327985667972e-06, "epoch": 0.14, "percentage": 2.75, "elapsed_time": "0:01:38", "remaining_time": "0:57:57"} +{"current_steps": 80, "total_steps": 2545, "loss": 0.5864, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.986074275745314e-06, "epoch": 0.16, "percentage": 3.14, "elapsed_time": "0:01:50", "remaining_time": "0:56:48"} +{"current_steps": 90, "total_steps": 2545, "loss": 0.4707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.981048720775401e-06, "epoch": 0.18, "percentage": 3.54, "elapsed_time": "0:02:03", "remaining_time": "0:56:14"} +{"current_steps": 100, "total_steps": 2545, "loss": 0.5636, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.975252098714898e-06, "epoch": 0.2, "percentage": 3.93, "elapsed_time": "0:02:16", "remaining_time": "0:55:34"} +{"current_steps": 110, "total_steps": 2545, "loss": 0.4544, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.968685306881772e-06, "epoch": 0.22, "percentage": 4.32, "elapsed_time": "0:02:30", "remaining_time": "0:55:21"} +{"current_steps": 120, "total_steps": 2545, "loss": 0.472, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.961349361816384e-06, "epoch": 0.24, "percentage": 4.72, "elapsed_time": "0:02:42", "remaining_time": "0:54:47"} +{"current_steps": 130, "total_steps": 2545, "loss": 0.4864, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.953245399124133e-06, "epoch": 0.26, "percentage": 5.11, "elapsed_time": "0:03:01", "remaining_time": "0:56:07"} +{"current_steps": 140, "total_steps": 2545, "loss": 0.4197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.94437467329966e-06, "epoch": 0.28, "percentage": 5.5, "elapsed_time": "0:03:16", "remaining_time": "0:56:16"} +{"current_steps": 150, "total_steps": 2545, "loss": 0.4712, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.934738557532663e-06, "epoch": 0.29, "percentage": 5.89, "elapsed_time": "0:03:30", "remaining_time": "0:55:54"} +{"current_steps": 160, "total_steps": 2545, "loss": 0.3896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.924338543495302e-06, "epoch": 0.31, "percentage": 6.29, "elapsed_time": "0:03:45", "remaining_time": "0:55:59"} +{"current_steps": 170, "total_steps": 2545, "loss": 0.5073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.91317624111132e-06, "epoch": 0.33, "percentage": 6.68, "elapsed_time": "0:03:57", "remaining_time": "0:55:19"} +{"current_steps": 180, "total_steps": 2545, "loss": 0.4667, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.9012533783068e-06, "epoch": 0.35, "percentage": 7.07, "elapsed_time": "0:04:09", "remaining_time": "0:54:40"} +{"current_steps": 190, "total_steps": 2545, "loss": 0.4281, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.888571800742694e-06, "epoch": 0.37, "percentage": 7.47, "elapsed_time": "0:04:24", "remaining_time": "0:54:42"} +{"current_steps": 200, "total_steps": 2545, "loss": 0.5422, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.87513347152912e-06, "epoch": 0.39, "percentage": 7.86, "elapsed_time": "0:04:41", "remaining_time": "0:54:57"} +{"current_steps": 200, "total_steps": 2545, "loss": null, "eval_loss": 0.47923311591148376, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.39, "percentage": 7.86, "elapsed_time": "0:04:41", "remaining_time": "0:54:57"} +{"current_steps": 210, "total_steps": 2545, "loss": 0.4597, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.860940470921457e-06, "epoch": 0.41, "percentage": 8.25, "elapsed_time": "0:05:33", "remaining_time": "1:01:49"} +{"current_steps": 220, "total_steps": 2545, "loss": 0.4876, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.845994995998332e-06, "epoch": 0.43, "percentage": 8.64, "elapsed_time": "0:05:44", "remaining_time": "1:00:39"} +{"current_steps": 230, "total_steps": 2545, "loss": 0.5003, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.830299360321509e-06, "epoch": 0.45, "percentage": 9.04, "elapsed_time": "0:05:55", "remaining_time": "0:59:42"} +{"current_steps": 240, "total_steps": 2545, "loss": 0.3836, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.813855993577753e-06, "epoch": 0.47, "percentage": 9.43, "elapsed_time": "0:06:11", "remaining_time": "0:59:24"} +{"current_steps": 250, "total_steps": 2545, "loss": 0.4631, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.796667441202705e-06, "epoch": 0.49, "percentage": 9.82, "elapsed_time": "0:06:24", "remaining_time": "0:58:50"} +{"current_steps": 260, "total_steps": 2545, "loss": 0.4101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.778736363986854e-06, "epoch": 0.51, "percentage": 10.22, "elapsed_time": "0:06:36", "remaining_time": "0:58:01"} +{"current_steps": 270, "total_steps": 2545, "loss": 0.4622, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.76006553766365e-06, "epoch": 0.53, "percentage": 10.61, "elapsed_time": "0:06:50", "remaining_time": "0:57:35"} +{"current_steps": 280, "total_steps": 2545, "loss": 0.4378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.740657852479815e-06, "epoch": 0.55, "percentage": 11.0, "elapsed_time": "0:07:05", "remaining_time": "0:57:20"} +{"current_steps": 290, "total_steps": 2545, "loss": 0.3582, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.72051631274793e-06, "epoch": 0.57, "percentage": 11.39, "elapsed_time": "0:07:19", "remaining_time": "0:56:56"} +{"current_steps": 300, "total_steps": 2545, "loss": 0.45, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.699644036381374e-06, "epoch": 0.59, "percentage": 11.79, "elapsed_time": "0:07:31", "remaining_time": "0:56:21"} +{"current_steps": 310, "total_steps": 2545, "loss": 0.3289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.678044254411665e-06, "epoch": 0.61, "percentage": 12.18, "elapsed_time": "0:07:46", "remaining_time": "0:55:59"} +{"current_steps": 320, "total_steps": 2545, "loss": 0.3618, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.655720310488298e-06, "epoch": 0.63, "percentage": 12.57, "elapsed_time": "0:07:57", "remaining_time": "0:55:21"} +{"current_steps": 330, "total_steps": 2545, "loss": 0.3579, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.632675660361148e-06, "epoch": 0.65, "percentage": 12.97, "elapsed_time": "0:08:11", "remaining_time": "0:55:02"} +{"current_steps": 340, "total_steps": 2545, "loss": 0.4039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.60891387134552e-06, "epoch": 0.67, "percentage": 13.36, "elapsed_time": "0:08:26", "remaining_time": "0:54:46"} +{"current_steps": 350, "total_steps": 2545, "loss": 0.4668, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.58443862176992e-06, "epoch": 0.69, "percentage": 13.75, "elapsed_time": "0:08:40", "remaining_time": "0:54:26"} +{"current_steps": 360, "total_steps": 2545, "loss": 0.4174, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.559253700406663e-06, "epoch": 0.71, "percentage": 14.15, "elapsed_time": "0:08:53", "remaining_time": "0:54:00"} +{"current_steps": 370, "total_steps": 2545, "loss": 0.4081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.533363005885362e-06, "epoch": 0.73, "percentage": 14.54, "elapsed_time": "0:09:07", "remaining_time": "0:53:37"} +{"current_steps": 380, "total_steps": 2545, "loss": 0.3972, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.506770546089422e-06, "epoch": 0.75, "percentage": 14.93, "elapsed_time": "0:09:19", "remaining_time": "0:53:10"} +{"current_steps": 390, "total_steps": 2545, "loss": 0.3379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.479480437535613e-06, "epoch": 0.77, "percentage": 15.32, "elapsed_time": "0:09:36", "remaining_time": "0:53:03"} +{"current_steps": 400, "total_steps": 2545, "loss": 0.319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.45149690473684e-06, "epoch": 0.79, "percentage": 15.72, "elapsed_time": "0:09:47", "remaining_time": "0:52:29"} +{"current_steps": 400, "total_steps": 2545, "loss": null, "eval_loss": 0.41680005192756653, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.79, "percentage": 15.72, "elapsed_time": "0:09:47", "remaining_time": "0:52:29"} +{"current_steps": 410, "total_steps": 2545, "loss": 0.454, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.422824279548189e-06, "epoch": 0.81, "percentage": 16.11, "elapsed_time": "0:10:38", "remaining_time": "0:55:27"} +{"current_steps": 420, "total_steps": 2545, "loss": 0.3599, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.393467000496345e-06, "epoch": 0.83, "percentage": 16.5, "elapsed_time": "0:10:53", "remaining_time": "0:55:07"} +{"current_steps": 430, "total_steps": 2545, "loss": 0.3763, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.36342961209252e-06, "epoch": 0.84, "percentage": 16.9, "elapsed_time": "0:11:07", "remaining_time": "0:54:43"} +{"current_steps": 440, "total_steps": 2545, "loss": 0.3931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.332716764128952e-06, "epoch": 0.86, "percentage": 17.29, "elapsed_time": "0:11:22", "remaining_time": "0:54:22"} +{"current_steps": 450, "total_steps": 2545, "loss": 0.3708, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.301333210959123e-06, "epoch": 0.88, "percentage": 17.68, "elapsed_time": "0:11:35", "remaining_time": "0:53:56"} +{"current_steps": 460, "total_steps": 2545, "loss": 0.3863, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.26928381076178e-06, "epoch": 0.9, "percentage": 18.07, "elapsed_time": "0:11:48", "remaining_time": "0:53:32"} +{"current_steps": 470, "total_steps": 2545, "loss": 0.3334, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.236573524788888e-06, "epoch": 0.92, "percentage": 18.47, "elapsed_time": "0:12:01", "remaining_time": "0:53:06"} +{"current_steps": 480, "total_steps": 2545, "loss": 0.4054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.203207416597635e-06, "epoch": 0.94, "percentage": 18.86, "elapsed_time": "0:12:15", "remaining_time": "0:52:46"} +{"current_steps": 490, "total_steps": 2545, "loss": 0.3992, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.169190651266582e-06, "epoch": 0.96, "percentage": 19.25, "elapsed_time": "0:12:29", "remaining_time": "0:52:22"} +{"current_steps": 500, "total_steps": 2545, "loss": 0.3113, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.134528494596116e-06, "epoch": 0.98, "percentage": 19.65, "elapsed_time": "0:12:42", "remaining_time": "0:51:57"} +{"current_steps": 510, "total_steps": 2545, "loss": 0.4078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.099226312293296e-06, "epoch": 1.0, "percentage": 20.04, "elapsed_time": "0:12:55", "remaining_time": "0:51:33"} +{"current_steps": 520, "total_steps": 2545, "loss": 0.3958, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.063289569141251e-06, "epoch": 1.02, "percentage": 20.43, "elapsed_time": "0:13:09", "remaining_time": "0:51:15"} +{"current_steps": 530, "total_steps": 2545, "loss": 0.3513, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.026723828153224e-06, "epoch": 1.04, "percentage": 20.83, "elapsed_time": "0:13:25", "remaining_time": "0:51:00"} +{"current_steps": 540, "total_steps": 2545, "loss": 0.3857, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.98953474971141e-06, "epoch": 1.06, "percentage": 21.22, "elapsed_time": "0:13:38", "remaining_time": "0:50:37"} +{"current_steps": 550, "total_steps": 2545, "loss": 0.3646, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.951728090690743e-06, "epoch": 1.08, "percentage": 21.61, "elapsed_time": "0:13:50", "remaining_time": "0:50:13"} +{"current_steps": 560, "total_steps": 2545, "loss": 0.3052, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.913309703567722e-06, "epoch": 1.1, "percentage": 22.0, "elapsed_time": "0:14:06", "remaining_time": "0:50:00"} +{"current_steps": 570, "total_steps": 2545, "loss": 0.3649, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.87428553551445e-06, "epoch": 1.12, "percentage": 22.4, "elapsed_time": "0:14:19", "remaining_time": "0:49:38"} +{"current_steps": 580, "total_steps": 2545, "loss": 0.3371, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.834661627478003e-06, "epoch": 1.14, "percentage": 22.79, "elapsed_time": "0:14:32", "remaining_time": "0:49:17"} +{"current_steps": 590, "total_steps": 2545, "loss": 0.346, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.794444113245302e-06, "epoch": 1.16, "percentage": 23.18, "elapsed_time": "0:14:45", "remaining_time": "0:48:52"} +{"current_steps": 600, "total_steps": 2545, "loss": 0.3583, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.753639218493594e-06, "epoch": 1.18, "percentage": 23.58, "elapsed_time": "0:14:57", "remaining_time": "0:48:30"} +{"current_steps": 600, "total_steps": 2545, "loss": null, "eval_loss": 0.38733917474746704, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.18, "percentage": 23.58, "elapsed_time": "0:14:57", "remaining_time": "0:48:30"} +{"current_steps": 610, "total_steps": 2545, "loss": 0.3845, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.712253259826719e-06, "epoch": 1.2, "percentage": 23.97, "elapsed_time": "0:15:52", "remaining_time": "0:50:19"} +{"current_steps": 620, "total_steps": 2545, "loss": 0.431, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.670292643797302e-06, "epoch": 1.22, "percentage": 24.36, "elapsed_time": "0:16:06", "remaining_time": "0:49:59"} +{"current_steps": 630, "total_steps": 2545, "loss": 0.3512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.627763865915005e-06, "epoch": 1.24, "percentage": 24.75, "elapsed_time": "0:16:20", "remaining_time": "0:49:38"} +{"current_steps": 640, "total_steps": 2545, "loss": 0.3475, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.58467350964104e-06, "epoch": 1.26, "percentage": 25.15, "elapsed_time": "0:16:33", "remaining_time": "0:49:17"} +{"current_steps": 650, "total_steps": 2545, "loss": 0.3372, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.541028245369033e-06, "epoch": 1.28, "percentage": 25.54, "elapsed_time": "0:16:48", "remaining_time": "0:49:00"} +{"current_steps": 660, "total_steps": 2545, "loss": 0.3133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.496834829392454e-06, "epoch": 1.3, "percentage": 25.93, "elapsed_time": "0:16:59", "remaining_time": "0:48:33"} +{"current_steps": 670, "total_steps": 2545, "loss": 0.3274, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.452100102858734e-06, "epoch": 1.32, "percentage": 26.33, "elapsed_time": "0:17:16", "remaining_time": "0:48:19"} +{"current_steps": 680, "total_steps": 2545, "loss": 0.3771, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.40683099071027e-06, "epoch": 1.34, "percentage": 26.72, "elapsed_time": "0:17:29", "remaining_time": "0:47:57"} +{"current_steps": 690, "total_steps": 2545, "loss": 0.3636, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.361034500612421e-06, "epoch": 1.36, "percentage": 27.11, "elapsed_time": "0:17:45", "remaining_time": "0:47:44"} +{"current_steps": 700, "total_steps": 2545, "loss": 0.3026, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.31471772186874e-06, "epoch": 1.38, "percentage": 27.5, "elapsed_time": "0:17:59", "remaining_time": "0:47:24"} +{"current_steps": 710, "total_steps": 2545, "loss": 0.3075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.267887824323537e-06, "epoch": 1.39, "percentage": 27.9, "elapsed_time": "0:18:13", "remaining_time": "0:47:05"} +{"current_steps": 720, "total_steps": 2545, "loss": 0.2668, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.22055205725199e-06, "epoch": 1.41, "percentage": 28.29, "elapsed_time": "0:18:25", "remaining_time": "0:46:42"} +{"current_steps": 730, "total_steps": 2545, "loss": 0.2809, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.172717748237955e-06, "epoch": 1.43, "percentage": 28.68, "elapsed_time": "0:18:40", "remaining_time": "0:46:25"} +{"current_steps": 740, "total_steps": 2545, "loss": 0.2968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.124392302039658e-06, "epoch": 1.45, "percentage": 29.08, "elapsed_time": "0:18:54", "remaining_time": "0:46:07"} +{"current_steps": 750, "total_steps": 2545, "loss": 0.3148, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.075583199443431e-06, "epoch": 1.47, "percentage": 29.47, "elapsed_time": "0:19:06", "remaining_time": "0:45:44"} +{"current_steps": 760, "total_steps": 2545, "loss": 0.3266, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.026297996105694e-06, "epoch": 1.49, "percentage": 29.86, "elapsed_time": "0:19:20", "remaining_time": "0:45:26"} +{"current_steps": 770, "total_steps": 2545, "loss": 0.3143, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.97654432138333e-06, "epoch": 1.51, "percentage": 30.26, "elapsed_time": "0:19:34", "remaining_time": "0:45:06"} +{"current_steps": 780, "total_steps": 2545, "loss": 0.3853, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.926329877152665e-06, "epoch": 1.53, "percentage": 30.65, "elapsed_time": "0:19:46", "remaining_time": "0:44:43"} +{"current_steps": 790, "total_steps": 2545, "loss": 0.3196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.875662436617211e-06, "epoch": 1.55, "percentage": 31.04, "elapsed_time": "0:20:01", "remaining_time": "0:44:28"} +{"current_steps": 800, "total_steps": 2545, "loss": 0.3048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.824549843104378e-06, "epoch": 1.57, "percentage": 31.43, "elapsed_time": "0:20:12", "remaining_time": "0:44:05"} +{"current_steps": 800, "total_steps": 2545, "loss": null, "eval_loss": 0.369180291891098, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.57, "percentage": 31.43, "elapsed_time": "0:20:12", "remaining_time": "0:44:05"} +{"current_steps": 810, "total_steps": 2545, "loss": 0.3529, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.773000008851323e-06, "epoch": 1.59, "percentage": 31.83, "elapsed_time": "0:21:08", "remaining_time": "0:45:16"} +{"current_steps": 820, "total_steps": 2545, "loss": 0.4047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.721020913780137e-06, "epoch": 1.61, "percentage": 32.22, "elapsed_time": "0:21:20", "remaining_time": "0:44:52"} +{"current_steps": 830, "total_steps": 2545, "loss": 0.3268, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.668620604262548e-06, "epoch": 1.63, "percentage": 32.61, "elapsed_time": "0:21:32", "remaining_time": "0:44:31"} +{"current_steps": 840, "total_steps": 2545, "loss": 0.3198, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.615807191874349e-06, "epoch": 1.65, "percentage": 33.01, "elapsed_time": "0:21:45", "remaining_time": "0:44:09"} +{"current_steps": 850, "total_steps": 2545, "loss": 0.3856, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.56258885213971e-06, "epoch": 1.67, "percentage": 33.4, "elapsed_time": "0:21:58", "remaining_time": "0:43:48"} +{"current_steps": 860, "total_steps": 2545, "loss": 0.2915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.50897382326562e-06, "epoch": 1.69, "percentage": 33.79, "elapsed_time": "0:22:11", "remaining_time": "0:43:28"} +{"current_steps": 870, "total_steps": 2545, "loss": 0.2955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.454970404866612e-06, "epoch": 1.71, "percentage": 34.18, "elapsed_time": "0:22:27", "remaining_time": "0:43:14"} +{"current_steps": 880, "total_steps": 2545, "loss": 0.3256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.400586956679965e-06, "epoch": 1.73, "percentage": 34.58, "elapsed_time": "0:22:41", "remaining_time": "0:42:56"} +{"current_steps": 890, "total_steps": 2545, "loss": 0.3329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.345831897271636e-06, "epoch": 1.75, "percentage": 34.97, "elapsed_time": "0:22:55", "remaining_time": "0:42:37"} +{"current_steps": 900, "total_steps": 2545, "loss": 0.4319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.2907137027330455e-06, "epoch": 1.77, "percentage": 35.36, "elapsed_time": "0:23:07", "remaining_time": "0:42:16"} +{"current_steps": 910, "total_steps": 2545, "loss": 0.3695, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.235240905368997e-06, "epoch": 1.79, "percentage": 35.76, "elapsed_time": "0:23:23", "remaining_time": "0:42:00"} +{"current_steps": 920, "total_steps": 2545, "loss": 0.2896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.179422092376856e-06, "epoch": 1.81, "percentage": 36.15, "elapsed_time": "0:23:33", "remaining_time": "0:41:37"} +{"current_steps": 930, "total_steps": 2545, "loss": 0.3428, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.123265904517264e-06, "epoch": 1.83, "percentage": 36.54, "elapsed_time": "0:23:45", "remaining_time": "0:41:15"} +{"current_steps": 940, "total_steps": 2545, "loss": 0.3375, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.066781034776546e-06, "epoch": 1.85, "percentage": 36.94, "elapsed_time": "0:23:58", "remaining_time": "0:40:57"} +{"current_steps": 950, "total_steps": 2545, "loss": 0.3072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.009976227021018e-06, "epoch": 1.87, "percentage": 37.33, "elapsed_time": "0:24:12", "remaining_time": "0:40:38"} +{"current_steps": 960, "total_steps": 2545, "loss": 0.3428, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.952860274643466e-06, "epoch": 1.89, "percentage": 37.72, "elapsed_time": "0:24:26", "remaining_time": "0:40:20"} +{"current_steps": 970, "total_steps": 2545, "loss": 0.374, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.895442019201898e-06, "epoch": 1.91, "percentage": 38.11, "elapsed_time": "0:24:41", "remaining_time": "0:40:05"} +{"current_steps": 980, "total_steps": 2545, "loss": 0.3584, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.837730349050886e-06, "epoch": 1.93, "percentage": 38.51, "elapsed_time": "0:24:54", "remaining_time": "0:39:46"} +{"current_steps": 990, "total_steps": 2545, "loss": 0.2854, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.7797341979656454e-06, "epoch": 1.94, "percentage": 38.9, "elapsed_time": "0:25:08", "remaining_time": "0:39:28"} +{"current_steps": 1000, "total_steps": 2545, "loss": 0.4185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.721462543759077e-06, "epoch": 1.96, "percentage": 39.29, "elapsed_time": "0:25:23", "remaining_time": "0:39:13"} +{"current_steps": 1000, "total_steps": 2545, "loss": null, "eval_loss": 0.35499048233032227, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.96, "percentage": 39.29, "elapsed_time": "0:25:23", "remaining_time": "0:39:13"} +{"current_steps": 1010, "total_steps": 2545, "loss": 0.3806, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.6629244068920155e-06, "epoch": 1.98, "percentage": 39.69, "elapsed_time": "0:26:15", "remaining_time": "0:39:53"} +{"current_steps": 1020, "total_steps": 2545, "loss": 0.2947, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.6041288490768385e-06, "epoch": 2.0, "percentage": 40.08, "elapsed_time": "0:26:31", "remaining_time": "0:39:39"} +{"current_steps": 1030, "total_steps": 2545, "loss": 0.2732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.545084971874738e-06, "epoch": 2.02, "percentage": 40.47, "elapsed_time": "0:26:43", "remaining_time": "0:39:18"} +{"current_steps": 1040, "total_steps": 2545, "loss": 0.2952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.485801915286776e-06, "epoch": 2.04, "percentage": 40.86, "elapsed_time": "0:26:59", "remaining_time": "0:39:03"} +{"current_steps": 1050, "total_steps": 2545, "loss": 0.2251, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.426288856339022e-06, "epoch": 2.06, "percentage": 41.26, "elapsed_time": "0:27:13", "remaining_time": "0:38:46"} +{"current_steps": 1060, "total_steps": 2545, "loss": 0.2822, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.366555007661949e-06, "epoch": 2.08, "percentage": 41.65, "elapsed_time": "0:27:28", "remaining_time": "0:38:29"} +{"current_steps": 1070, "total_steps": 2545, "loss": 0.278, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.306609616064304e-06, "epoch": 2.1, "percentage": 42.04, "elapsed_time": "0:27:41", "remaining_time": "0:38:10"} +{"current_steps": 1080, "total_steps": 2545, "loss": 0.2213, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.246461961101721e-06, "epoch": 2.12, "percentage": 42.44, "elapsed_time": "0:27:54", "remaining_time": "0:37:50"} +{"current_steps": 1090, "total_steps": 2545, "loss": 0.3001, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.186121353640223e-06, "epoch": 2.14, "percentage": 42.83, "elapsed_time": "0:28:09", "remaining_time": "0:37:35"} +{"current_steps": 1100, "total_steps": 2545, "loss": 0.3357, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.125597134414919e-06, "epoch": 2.16, "percentage": 43.22, "elapsed_time": "0:28:23", "remaining_time": "0:37:17"} +{"current_steps": 1110, "total_steps": 2545, "loss": 0.3637, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.064898672584036e-06, "epoch": 2.18, "percentage": 43.61, "elapsed_time": "0:28:36", "remaining_time": "0:36:58"} +{"current_steps": 1120, "total_steps": 2545, "loss": 0.4342, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.004035364278593e-06, "epoch": 2.2, "percentage": 44.01, "elapsed_time": "0:28:49", "remaining_time": "0:36:40"} +{"current_steps": 1130, "total_steps": 2545, "loss": 0.3534, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.94301663114786e-06, "epoch": 2.22, "percentage": 44.4, "elapsed_time": "0:29:02", "remaining_time": "0:36:21"} +{"current_steps": 1140, "total_steps": 2545, "loss": 0.271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.881851918900897e-06, "epoch": 2.24, "percentage": 44.79, "elapsed_time": "0:29:14", "remaining_time": "0:36:02"} +{"current_steps": 1150, "total_steps": 2545, "loss": 0.3774, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.820550695844354e-06, "epoch": 2.26, "percentage": 45.19, "elapsed_time": "0:29:26", "remaining_time": "0:35:43"} +{"current_steps": 1160, "total_steps": 2545, "loss": 0.2967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.759122451416769e-06, "epoch": 2.28, "percentage": 45.58, "elapsed_time": "0:29:39", "remaining_time": "0:35:24"} +{"current_steps": 1170, "total_steps": 2545, "loss": 0.2556, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.697576694719616e-06, "epoch": 2.3, "percentage": 45.97, "elapsed_time": "0:29:57", "remaining_time": "0:35:12"} +{"current_steps": 1180, "total_steps": 2545, "loss": 0.3435, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.635922953045278e-06, "epoch": 2.32, "percentage": 46.37, "elapsed_time": "0:30:10", "remaining_time": "0:34:54"} +{"current_steps": 1190, "total_steps": 2545, "loss": 0.3064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.574170770402236e-06, "epoch": 2.34, "percentage": 46.76, "elapsed_time": "0:30:23", "remaining_time": "0:34:36"} +{"current_steps": 1200, "total_steps": 2545, "loss": 0.3737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.512329706037643e-06, "epoch": 2.36, "percentage": 47.15, "elapsed_time": "0:30:35", "remaining_time": "0:34:16"} +{"current_steps": 1200, "total_steps": 2545, "loss": null, "eval_loss": 0.34871676564216614, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.36, "percentage": 47.15, "elapsed_time": "0:30:35", "remaining_time": "0:34:16"} +{"current_steps": 1210, "total_steps": 2545, "loss": 0.3051, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.4504093329575546e-06, "epoch": 2.38, "percentage": 47.54, "elapsed_time": "0:31:27", "remaining_time": "0:34:42"} +{"current_steps": 1220, "total_steps": 2545, "loss": 0.3564, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.388419236445033e-06, "epoch": 2.4, "percentage": 47.94, "elapsed_time": "0:31:40", "remaining_time": "0:34:23"} +{"current_steps": 1230, "total_steps": 2545, "loss": 0.3243, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.3263690125763316e-06, "epoch": 2.42, "percentage": 48.33, "elapsed_time": "0:31:55", "remaining_time": "0:34:07"} +{"current_steps": 1240, "total_steps": 2545, "loss": 0.2923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.264268266735432e-06, "epoch": 2.44, "percentage": 48.72, "elapsed_time": "0:32:08", "remaining_time": "0:33:49"} +{"current_steps": 1250, "total_steps": 2545, "loss": 0.3074, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.202126612127125e-06, "epoch": 2.46, "percentage": 49.12, "elapsed_time": "0:32:22", "remaining_time": "0:33:32"} +{"current_steps": 1260, "total_steps": 2545, "loss": 0.3111, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.139953668288883e-06, "epoch": 2.48, "percentage": 49.51, "elapsed_time": "0:32:35", "remaining_time": "0:33:14"} +{"current_steps": 1270, "total_steps": 2545, "loss": 0.3539, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.077759059601756e-06, "epoch": 2.5, "percentage": 49.9, "elapsed_time": "0:32:48", "remaining_time": "0:32:56"} +{"current_steps": 1280, "total_steps": 2545, "loss": 0.2223, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.015552413800514e-06, "epoch": 2.51, "percentage": 50.29, "elapsed_time": "0:33:03", "remaining_time": "0:32:40"} +{"current_steps": 1290, "total_steps": 2545, "loss": 0.2714, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.953343360483281e-06, "epoch": 2.53, "percentage": 50.69, "elapsed_time": "0:33:16", "remaining_time": "0:32:22"} +{"current_steps": 1300, "total_steps": 2545, "loss": 0.2568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8911415296208555e-06, "epoch": 2.55, "percentage": 51.08, "elapsed_time": "0:33:33", "remaining_time": "0:32:08"} +{"current_steps": 1310, "total_steps": 2545, "loss": 0.2363, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.828956550066006e-06, "epoch": 2.57, "percentage": 51.47, "elapsed_time": "0:33:48", "remaining_time": "0:31:52"} +{"current_steps": 1320, "total_steps": 2545, "loss": 0.4116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.766798048062913e-06, "epoch": 2.59, "percentage": 51.87, "elapsed_time": "0:34:02", "remaining_time": "0:31:35"} +{"current_steps": 1330, "total_steps": 2545, "loss": 0.3128, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.704675645757028e-06, "epoch": 2.61, "percentage": 52.26, "elapsed_time": "0:34:18", "remaining_time": "0:31:20"} +{"current_steps": 1340, "total_steps": 2545, "loss": 0.2368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.642598959705565e-06, "epoch": 2.63, "percentage": 52.65, "elapsed_time": "0:34:31", "remaining_time": "0:31:02"} +{"current_steps": 1350, "total_steps": 2545, "loss": 0.3453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.580577599388854e-06, "epoch": 2.65, "percentage": 53.05, "elapsed_time": "0:34:44", "remaining_time": "0:30:45"} +{"current_steps": 1360, "total_steps": 2545, "loss": 0.2893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.518621165722794e-06, "epoch": 2.67, "percentage": 53.44, "elapsed_time": "0:34:57", "remaining_time": "0:30:27"} +{"current_steps": 1370, "total_steps": 2545, "loss": 0.3422, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.456739249572628e-06, "epoch": 2.69, "percentage": 53.83, "elapsed_time": "0:35:12", "remaining_time": "0:30:11"} +{"current_steps": 1380, "total_steps": 2545, "loss": 0.3076, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.394941430268275e-06, "epoch": 2.71, "percentage": 54.22, "elapsed_time": "0:35:24", "remaining_time": "0:29:53"} +{"current_steps": 1390, "total_steps": 2545, "loss": 0.3504, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.333237274121453e-06, "epoch": 2.73, "percentage": 54.62, "elapsed_time": "0:35:38", "remaining_time": "0:29:36"} +{"current_steps": 1400, "total_steps": 2545, "loss": 0.2418, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.271636332944806e-06, "epoch": 2.75, "percentage": 55.01, "elapsed_time": "0:35:51", "remaining_time": "0:29:19"} +{"current_steps": 1400, "total_steps": 2545, "loss": null, "eval_loss": 0.3422459363937378, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.75, "percentage": 55.01, "elapsed_time": "0:35:51", "remaining_time": "0:29:19"} +{"current_steps": 1410, "total_steps": 2545, "loss": 0.3114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.210148142573287e-06, "epoch": 2.77, "percentage": 55.4, "elapsed_time": "0:36:45", "remaining_time": "0:29:35"} +{"current_steps": 1420, "total_steps": 2545, "loss": 0.3683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.148782221388007e-06, "epoch": 2.79, "percentage": 55.8, "elapsed_time": "0:37:00", "remaining_time": "0:29:18"} +{"current_steps": 1430, "total_steps": 2545, "loss": 0.3036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.087548068842787e-06, "epoch": 2.81, "percentage": 56.19, "elapsed_time": "0:37:13", "remaining_time": "0:29:01"} +{"current_steps": 1440, "total_steps": 2545, "loss": 0.3015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.026455163993645e-06, "epoch": 2.83, "percentage": 56.58, "elapsed_time": "0:37:26", "remaining_time": "0:28:44"} +{"current_steps": 1450, "total_steps": 2545, "loss": 0.2794, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.965512964031435e-06, "epoch": 2.85, "percentage": 56.97, "elapsed_time": "0:37:40", "remaining_time": "0:28:27"} +{"current_steps": 1460, "total_steps": 2545, "loss": 0.2911, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.904730902817867e-06, "epoch": 2.87, "percentage": 57.37, "elapsed_time": "0:37:52", "remaining_time": "0:28:08"} +{"current_steps": 1470, "total_steps": 2545, "loss": 0.2511, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.844118389425154e-06, "epoch": 2.89, "percentage": 57.76, "elapsed_time": "0:38:07", "remaining_time": "0:27:52"} +{"current_steps": 1480, "total_steps": 2545, "loss": 0.3791, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7836848066794763e-06, "epoch": 2.91, "percentage": 58.15, "elapsed_time": "0:38:21", "remaining_time": "0:27:36"} +{"current_steps": 1490, "total_steps": 2545, "loss": 0.3396, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7234395097085298e-06, "epoch": 2.93, "percentage": 58.55, "elapsed_time": "0:38:33", "remaining_time": "0:27:18"} +{"current_steps": 1500, "total_steps": 2545, "loss": 0.1901, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.663391824493336e-06, "epoch": 2.95, "percentage": 58.94, "elapsed_time": "0:38:47", "remaining_time": "0:27:01"} +{"current_steps": 1510, "total_steps": 2545, "loss": 0.3611, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6035510464245937e-06, "epoch": 2.97, "percentage": 59.33, "elapsed_time": "0:39:02", "remaining_time": "0:26:45"} +{"current_steps": 1520, "total_steps": 2545, "loss": 0.2753, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5439264388637407e-06, "epoch": 2.99, "percentage": 59.72, "elapsed_time": "0:39:16", "remaining_time": "0:26:29"} +{"current_steps": 1530, "total_steps": 2545, "loss": 0.2861, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4845272317089882e-06, "epoch": 3.01, "percentage": 60.12, "elapsed_time": "0:39:29", "remaining_time": "0:26:11"} +{"current_steps": 1540, "total_steps": 2545, "loss": 0.2989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4253626199665314e-06, "epoch": 3.03, "percentage": 60.51, "elapsed_time": "0:39:42", "remaining_time": "0:25:54"} +{"current_steps": 1550, "total_steps": 2545, "loss": 0.3478, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3664417623271594e-06, "epoch": 3.05, "percentage": 60.9, "elapsed_time": "0:39:56", "remaining_time": "0:25:38"} +{"current_steps": 1560, "total_steps": 2545, "loss": 0.2654, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3077737797484923e-06, "epoch": 3.06, "percentage": 61.3, "elapsed_time": "0:40:10", "remaining_time": "0:25:21"} +{"current_steps": 1570, "total_steps": 2545, "loss": 0.2063, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.249367754043047e-06, "epoch": 3.08, "percentage": 61.69, "elapsed_time": "0:40:25", "remaining_time": "0:25:06"} +{"current_steps": 1580, "total_steps": 2545, "loss": 0.3329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1912327264723843e-06, "epoch": 3.1, "percentage": 62.08, "elapsed_time": "0:40:39", "remaining_time": "0:24:49"} +{"current_steps": 1590, "total_steps": 2545, "loss": 0.3934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.133377696347506e-06, "epoch": 3.12, "percentage": 62.48, "elapsed_time": "0:40:53", "remaining_time": "0:24:33"} +{"current_steps": 1600, "total_steps": 2545, "loss": 0.2528, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.075811619635774e-06, "epoch": 3.14, "percentage": 62.87, "elapsed_time": "0:41:07", "remaining_time": "0:24:17"} +{"current_steps": 1600, "total_steps": 2545, "loss": null, "eval_loss": 0.33900028467178345, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 3.14, "percentage": 62.87, "elapsed_time": "0:41:07", "remaining_time": "0:24:17"} +{"current_steps": 1610, "total_steps": 2545, "loss": 0.247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0185434075745124e-06, "epoch": 3.16, "percentage": 63.26, "elapsed_time": "0:41:59", "remaining_time": "0:24:23"} +{"current_steps": 1620, "total_steps": 2545, "loss": 0.313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.961581925291557e-06, "epoch": 3.18, "percentage": 63.65, "elapsed_time": "0:42:12", "remaining_time": "0:24:06"} +{"current_steps": 1630, "total_steps": 2545, "loss": 0.3331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9049359904329234e-06, "epoch": 3.2, "percentage": 64.05, "elapsed_time": "0:42:27", "remaining_time": "0:23:49"} +{"current_steps": 1640, "total_steps": 2545, "loss": 0.2684, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8486143717978448e-06, "epoch": 3.22, "percentage": 64.44, "elapsed_time": "0:42:40", "remaining_time": "0:23:32"} +{"current_steps": 1650, "total_steps": 2545, "loss": 0.2743, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.792625787981349e-06, "epoch": 3.24, "percentage": 64.83, "elapsed_time": "0:42:52", "remaining_time": "0:23:15"} +{"current_steps": 1660, "total_steps": 2545, "loss": 0.3512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.736978906024626e-06, "epoch": 3.26, "percentage": 65.23, "elapsed_time": "0:43:08", "remaining_time": "0:22:59"} +{"current_steps": 1670, "total_steps": 2545, "loss": 0.2919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6816823400733628e-06, "epoch": 3.28, "percentage": 65.62, "elapsed_time": "0:43:22", "remaining_time": "0:22:43"} +{"current_steps": 1680, "total_steps": 2545, "loss": 0.322, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.626744650044284e-06, "epoch": 3.3, "percentage": 66.01, "elapsed_time": "0:43:34", "remaining_time": "0:22:26"} +{"current_steps": 1690, "total_steps": 2545, "loss": 0.2349, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.572174340300061e-06, "epoch": 3.32, "percentage": 66.4, "elapsed_time": "0:43:52", "remaining_time": "0:22:11"} +{"current_steps": 1700, "total_steps": 2545, "loss": 0.2368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5179798583328415e-06, "epoch": 3.34, "percentage": 66.8, "elapsed_time": "0:44:05", "remaining_time": "0:21:54"} +{"current_steps": 1710, "total_steps": 2545, "loss": 0.3711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4641695934565827e-06, "epoch": 3.36, "percentage": 67.19, "elapsed_time": "0:44:19", "remaining_time": "0:21:38"} +{"current_steps": 1720, "total_steps": 2545, "loss": 0.2323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.410751875508373e-06, "epoch": 3.38, "percentage": 67.58, "elapsed_time": "0:44:34", "remaining_time": "0:21:22"} +{"current_steps": 1730, "total_steps": 2545, "loss": 0.3185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3577349735589787e-06, "epoch": 3.4, "percentage": 67.98, "elapsed_time": "0:44:49", "remaining_time": "0:21:07"} +{"current_steps": 1740, "total_steps": 2545, "loss": 0.2479, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3051270946327887e-06, "epoch": 3.42, "percentage": 68.37, "elapsed_time": "0:45:05", "remaining_time": "0:20:51"} +{"current_steps": 1750, "total_steps": 2545, "loss": 0.3738, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2529363824373707e-06, "epoch": 3.44, "percentage": 68.76, "elapsed_time": "0:45:18", "remaining_time": "0:20:35"} +{"current_steps": 1760, "total_steps": 2545, "loss": 0.3481, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2011709161028156e-06, "epoch": 3.46, "percentage": 69.16, "elapsed_time": "0:45:30", "remaining_time": "0:20:17"} +{"current_steps": 1770, "total_steps": 2545, "loss": 0.2863, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.149838708931087e-06, "epoch": 3.48, "percentage": 69.55, "elapsed_time": "0:45:43", "remaining_time": "0:20:01"} +{"current_steps": 1780, "total_steps": 2545, "loss": 0.2415, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0989477071555757e-06, "epoch": 3.5, "percentage": 69.94, "elapsed_time": "0:45:58", "remaining_time": "0:19:45"} +{"current_steps": 1790, "total_steps": 2545, "loss": 0.2989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0485057887110026e-06, "epoch": 3.52, "percentage": 70.33, "elapsed_time": "0:46:10", "remaining_time": "0:19:28"} +{"current_steps": 1800, "total_steps": 2545, "loss": 0.3192, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.998520762013923e-06, "epoch": 3.54, "percentage": 70.73, "elapsed_time": "0:46:24", "remaining_time": "0:19:12"} +{"current_steps": 1800, "total_steps": 2545, "loss": null, "eval_loss": 0.33926254510879517, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 3.54, "percentage": 70.73, "elapsed_time": "0:46:24", "remaining_time": "0:19:12"} +{"current_steps": 1810, "total_steps": 2545, "loss": 0.2484, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.949000364753985e-06, "epoch": 3.56, "percentage": 71.12, "elapsed_time": "0:47:16", "remaining_time": "0:19:11"} +{"current_steps": 1820, "total_steps": 2545, "loss": 0.2761, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8999522626961254e-06, "epoch": 3.58, "percentage": 71.51, "elapsed_time": "0:47:31", "remaining_time": "0:18:55"} +{"current_steps": 1830, "total_steps": 2545, "loss": 0.2972, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.851384048493914e-06, "epoch": 3.6, "percentage": 71.91, "elapsed_time": "0:47:43", "remaining_time": "0:18:38"} +{"current_steps": 1840, "total_steps": 2545, "loss": 0.2279, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8033032405142075e-06, "epoch": 3.61, "percentage": 72.3, "elapsed_time": "0:47:55", "remaining_time": "0:18:21"} +{"current_steps": 1850, "total_steps": 2545, "loss": 0.2662, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7557172816733104e-06, "epoch": 3.63, "percentage": 72.69, "elapsed_time": "0:48:08", "remaining_time": "0:18:05"} +{"current_steps": 1860, "total_steps": 2545, "loss": 0.3054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7086335382848012e-06, "epoch": 3.65, "percentage": 73.08, "elapsed_time": "0:48:21", "remaining_time": "0:17:48"} +{"current_steps": 1870, "total_steps": 2545, "loss": 0.3051, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6620592989192318e-06, "epoch": 3.67, "percentage": 73.48, "elapsed_time": "0:48:34", "remaining_time": "0:17:31"} +{"current_steps": 1880, "total_steps": 2545, "loss": 0.2765, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6160017732758588e-06, "epoch": 3.69, "percentage": 73.87, "elapsed_time": "0:48:49", "remaining_time": "0:17:16"} +{"current_steps": 1890, "total_steps": 2545, "loss": 0.2839, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5704680910665721e-06, "epoch": 3.71, "percentage": 74.26, "elapsed_time": "0:49:04", "remaining_time": "0:17:00"} +{"current_steps": 1900, "total_steps": 2545, "loss": 0.2527, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5254653009122206e-06, "epoch": 3.73, "percentage": 74.66, "elapsed_time": "0:49:18", "remaining_time": "0:16:44"} +{"current_steps": 1910, "total_steps": 2545, "loss": 0.2268, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4810003692514829e-06, "epoch": 3.75, "percentage": 75.05, "elapsed_time": "0:49:31", "remaining_time": "0:16:27"} +{"current_steps": 1920, "total_steps": 2545, "loss": 0.3011, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4370801792624656e-06, "epoch": 3.77, "percentage": 75.44, "elapsed_time": "0:49:44", "remaining_time": "0:16:11"} +{"current_steps": 1930, "total_steps": 2545, "loss": 0.2938, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3937115297971788e-06, "epoch": 3.79, "percentage": 75.83, "elapsed_time": "0:49:57", "remaining_time": "0:15:55"} +{"current_steps": 1940, "total_steps": 2545, "loss": 0.2375, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3509011343290767e-06, "epoch": 3.81, "percentage": 76.23, "elapsed_time": "0:50:12", "remaining_time": "0:15:39"} +{"current_steps": 1950, "total_steps": 2545, "loss": 0.2748, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3086556199138117e-06, "epoch": 3.83, "percentage": 76.62, "elapsed_time": "0:50:27", "remaining_time": "0:15:23"} +{"current_steps": 1960, "total_steps": 2545, "loss": 0.2703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2669815261633666e-06, "epoch": 3.85, "percentage": 77.01, "elapsed_time": "0:50:42", "remaining_time": "0:15:07"} +{"current_steps": 1970, "total_steps": 2545, "loss": 0.2331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.225885304233716e-06, "epoch": 3.87, "percentage": 77.41, "elapsed_time": "0:50:55", "remaining_time": "0:14:51"} +{"current_steps": 1980, "total_steps": 2545, "loss": 0.3299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1853733158261894e-06, "epoch": 3.89, "percentage": 77.8, "elapsed_time": "0:51:09", "remaining_time": "0:14:35"} +{"current_steps": 1990, "total_steps": 2545, "loss": 0.2159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1454518322026836e-06, "epoch": 3.91, "percentage": 78.19, "elapsed_time": "0:51:21", "remaining_time": "0:14:19"} +{"current_steps": 2000, "total_steps": 2545, "loss": 0.2834, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1061270332148626e-06, "epoch": 3.93, "percentage": 78.59, "elapsed_time": "0:51:35", "remaining_time": "0:14:03"} +{"current_steps": 2000, "total_steps": 2545, "loss": null, "eval_loss": 0.3370112180709839, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 3.93, "percentage": 78.59, "elapsed_time": "0:51:35", "remaining_time": "0:14:03"} +{"current_steps": 2010, "total_steps": 2545, "loss": 0.2315, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.06740500634752e-06, "epoch": 3.95, "percentage": 78.98, "elapsed_time": "0:52:29", "remaining_time": "0:13:58"} +{"current_steps": 2020, "total_steps": 2545, "loss": 0.32, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0292917457762325e-06, "epoch": 3.97, "percentage": 79.37, "elapsed_time": "0:52:41", "remaining_time": "0:13:41"} +{"current_steps": 2030, "total_steps": 2545, "loss": 0.3086, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.917931514394674e-07, "epoch": 3.99, "percentage": 79.76, "elapsed_time": "0:52:55", "remaining_time": "0:13:25"} +{"current_steps": 2040, "total_steps": 2545, "loss": 0.327, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.549150281252633e-07, "epoch": 4.01, "percentage": 80.16, "elapsed_time": "0:53:11", "remaining_time": "0:13:09"} +{"current_steps": 2050, "total_steps": 2545, "loss": 0.2789, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.186630845726497e-07, "epoch": 4.03, "percentage": 80.55, "elapsed_time": "0:53:24", "remaining_time": "0:12:53"} +{"current_steps": 2060, "total_steps": 2545, "loss": 0.305, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.830429325879397e-07, "epoch": 4.05, "percentage": 80.94, "elapsed_time": "0:53:39", "remaining_time": "0:12:37"} +{"current_steps": 2070, "total_steps": 2545, "loss": 0.2732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.480600861760124e-07, "epoch": 4.07, "percentage": 81.34, "elapsed_time": "0:53:51", "remaining_time": "0:12:21"} +{"current_steps": 2080, "total_steps": 2545, "loss": 0.2897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.137199606867507e-07, "epoch": 4.09, "percentage": 81.73, "elapsed_time": "0:54:04", "remaining_time": "0:12:05"} +{"current_steps": 2090, "total_steps": 2545, "loss": 0.3181, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.800278719767407e-07, "epoch": 4.11, "percentage": 82.12, "elapsed_time": "0:54:16", "remaining_time": "0:11:48"} +{"current_steps": 2100, "total_steps": 2545, "loss": 0.2144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.469890355863785e-07, "epoch": 4.13, "percentage": 82.51, "elapsed_time": "0:54:28", "remaining_time": "0:11:32"} +{"current_steps": 2110, "total_steps": 2545, "loss": 0.294, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.146085659325025e-07, "epoch": 4.15, "percentage": 82.91, "elapsed_time": "0:54:44", "remaining_time": "0:11:17"} +{"current_steps": 2120, "total_steps": 2545, "loss": 0.2729, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.828914755166826e-07, "epoch": 4.17, "percentage": 83.3, "elapsed_time": "0:54:58", "remaining_time": "0:11:01"} +{"current_steps": 2130, "total_steps": 2545, "loss": 0.2509, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.518426741492823e-07, "epoch": 4.18, "percentage": 83.69, "elapsed_time": "0:55:13", "remaining_time": "0:10:45"} +{"current_steps": 2140, "total_steps": 2545, "loss": 0.2271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.214669681894181e-07, "epoch": 4.2, "percentage": 84.09, "elapsed_time": "0:55:27", "remaining_time": "0:10:29"} +{"current_steps": 2150, "total_steps": 2545, "loss": 0.2703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.917690598009385e-07, "epoch": 4.22, "percentage": 84.48, "elapsed_time": "0:55:40", "remaining_time": "0:10:13"} +{"current_steps": 2160, "total_steps": 2545, "loss": 0.2339, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.62753546224526e-07, "epoch": 4.24, "percentage": 84.87, "elapsed_time": "0:55:54", "remaining_time": "0:09:57"} +{"current_steps": 2170, "total_steps": 2545, "loss": 0.3029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.344249190660427e-07, "epoch": 4.26, "percentage": 85.27, "elapsed_time": "0:56:09", "remaining_time": "0:09:42"} +{"current_steps": 2180, "total_steps": 2545, "loss": 0.3205, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.067875636012304e-07, "epoch": 4.28, "percentage": 85.66, "elapsed_time": "0:56:24", "remaining_time": "0:09:26"} +{"current_steps": 2190, "total_steps": 2545, "loss": 0.2875, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.798457580968674e-07, "epoch": 4.3, "percentage": 86.05, "elapsed_time": "0:56:37", "remaining_time": "0:09:10"} +{"current_steps": 2200, "total_steps": 2545, "loss": 0.3612, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.536036731484972e-07, "epoch": 4.32, "percentage": 86.44, "elapsed_time": "0:56:55", "remaining_time": "0:08:55"} +{"current_steps": 2200, "total_steps": 2545, "loss": null, "eval_loss": 0.337531715631485, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 4.32, "percentage": 86.44, "elapsed_time": "0:56:55", "remaining_time": "0:08:55"} +{"current_steps": 2210, "total_steps": 2545, "loss": 0.2825, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.280653710348126e-07, "epoch": 4.34, "percentage": 86.84, "elapsed_time": "0:57:46", "remaining_time": "0:08:45"} +{"current_steps": 2220, "total_steps": 2545, "loss": 0.323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.03234805088818e-07, "epoch": 4.36, "percentage": 87.23, "elapsed_time": "0:58:01", "remaining_time": "0:08:29"} +{"current_steps": 2230, "total_steps": 2545, "loss": 0.2784, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7911581908585626e-07, "epoch": 4.38, "percentage": 87.62, "elapsed_time": "0:58:12", "remaining_time": "0:08:13"} +{"current_steps": 2240, "total_steps": 2545, "loss": 0.2632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5571214664858356e-07, "epoch": 4.4, "percentage": 88.02, "elapsed_time": "0:58:25", "remaining_time": "0:07:57"} +{"current_steps": 2250, "total_steps": 2545, "loss": 0.2192, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.330274106690112e-07, "epoch": 4.42, "percentage": 88.41, "elapsed_time": "0:58:36", "remaining_time": "0:07:41"} +{"current_steps": 2260, "total_steps": 2545, "loss": 0.2647, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.110651227476774e-07, "epoch": 4.44, "percentage": 88.8, "elapsed_time": "0:58:50", "remaining_time": "0:07:25"} +{"current_steps": 2270, "total_steps": 2545, "loss": 0.2897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8982868265005457e-07, "epoch": 4.46, "percentage": 89.19, "elapsed_time": "0:59:04", "remaining_time": "0:07:09"} +{"current_steps": 2280, "total_steps": 2545, "loss": 0.3023, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.693213777802606e-07, "epoch": 4.48, "percentage": 89.59, "elapsed_time": "0:59:18", "remaining_time": "0:06:53"} +{"current_steps": 2290, "total_steps": 2545, "loss": 0.2419, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.495463826721711e-07, "epoch": 4.5, "percentage": 89.98, "elapsed_time": "0:59:33", "remaining_time": "0:06:37"} +{"current_steps": 2300, "total_steps": 2545, "loss": 0.3104, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.305067584980014e-07, "epoch": 4.52, "percentage": 90.37, "elapsed_time": "0:59:48", "remaining_time": "0:06:22"} +{"current_steps": 2310, "total_steps": 2545, "loss": 0.328, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1220545259443558e-07, "epoch": 4.54, "percentage": 90.77, "elapsed_time": "1:00:01", "remaining_time": "0:06:06"} +{"current_steps": 2320, "total_steps": 2545, "loss": 0.307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9464529800637731e-07, "epoch": 4.56, "percentage": 91.16, "elapsed_time": "1:00:15", "remaining_time": "0:05:50"} +{"current_steps": 2330, "total_steps": 2545, "loss": 0.2437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7782901304839617e-07, "epoch": 4.58, "percentage": 91.55, "elapsed_time": "1:00:26", "remaining_time": "0:05:34"} +{"current_steps": 2340, "total_steps": 2545, "loss": 0.2862, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.617592008839308e-07, "epoch": 4.6, "percentage": 91.94, "elapsed_time": "1:00:38", "remaining_time": "0:05:18"} +{"current_steps": 2350, "total_steps": 2545, "loss": 0.2901, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4643834912232035e-07, "epoch": 4.62, "percentage": 92.34, "elapsed_time": "1:00:51", "remaining_time": "0:05:02"} +{"current_steps": 2360, "total_steps": 2545, "loss": 0.2482, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3186882943371892e-07, "epoch": 4.64, "percentage": 92.73, "elapsed_time": "1:01:05", "remaining_time": "0:04:47"} +{"current_steps": 2370, "total_steps": 2545, "loss": 0.2957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1805289718196499e-07, "epoch": 4.66, "percentage": 93.12, "elapsed_time": "1:01:17", "remaining_time": "0:04:31"} +{"current_steps": 2380, "total_steps": 2545, "loss": 0.3179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0499269107544674e-07, "epoch": 4.68, "percentage": 93.52, "elapsed_time": "1:01:30", "remaining_time": "0:04:15"} +{"current_steps": 2390, "total_steps": 2545, "loss": 0.277, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.269023283603295e-08, "epoch": 4.7, "percentage": 93.91, "elapsed_time": "1:01:43", "remaining_time": "0:04:00"} +{"current_steps": 2400, "total_steps": 2545, "loss": 0.2732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.114742688610789e-08, "epoch": 4.72, "percentage": 94.3, "elapsed_time": "1:01:57", "remaining_time": "0:03:44"} +{"current_steps": 2400, "total_steps": 2545, "loss": null, "eval_loss": 0.33687904477119446, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 4.72, "percentage": 94.3, "elapsed_time": "1:01:57", "remaining_time": "0:03:44"} +{"current_steps": 2410, "total_steps": 2545, "loss": 0.2598, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.036606005376701e-08, "epoch": 4.73, "percentage": 94.7, "elapsed_time": "1:02:51", "remaining_time": "0:03:31"} +{"current_steps": 2420, "total_steps": 2545, "loss": 0.2599, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.034780129621664e-08, "epoch": 4.75, "percentage": 95.09, "elapsed_time": "1:03:05", "remaining_time": "0:03:15"} +{"current_steps": 2430, "total_steps": 2545, "loss": 0.2699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.1094201441417855e-08, "epoch": 4.77, "percentage": 95.48, "elapsed_time": "1:03:20", "remaining_time": "0:02:59"} +{"current_steps": 2440, "total_steps": 2545, "loss": 0.2819, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.260669294801956e-08, "epoch": 4.79, "percentage": 95.87, "elapsed_time": "1:03:34", "remaining_time": "0:02:44"} +{"current_steps": 2450, "total_steps": 2545, "loss": 0.2666, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.488658968361147e-08, "epoch": 4.81, "percentage": 96.27, "elapsed_time": "1:03:50", "remaining_time": "0:02:28"} +{"current_steps": 2460, "total_steps": 2545, "loss": 0.2418, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.79350867213396e-08, "epoch": 4.83, "percentage": 96.66, "elapsed_time": "1:04:05", "remaining_time": "0:02:12"} +{"current_steps": 2470, "total_steps": 2545, "loss": 0.1971, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1753260154906973e-08, "epoch": 4.85, "percentage": 97.05, "elapsed_time": "1:04:19", "remaining_time": "0:01:57"} +{"current_steps": 2480, "total_steps": 2545, "loss": 0.3358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6342066931995804e-08, "epoch": 4.87, "percentage": 97.45, "elapsed_time": "1:04:32", "remaining_time": "0:01:41"} +{"current_steps": 2490, "total_steps": 2545, "loss": 0.2409, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1702344706129298e-08, "epoch": 4.89, "percentage": 97.84, "elapsed_time": "1:04:48", "remaining_time": "0:01:25"} +{"current_steps": 2500, "total_steps": 2545, "loss": 0.2549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.834811707005952e-09, "epoch": 4.91, "percentage": 98.23, "elapsed_time": "1:05:01", "remaining_time": "0:01:10"} +{"current_steps": 2510, "total_steps": 2545, "loss": 0.2957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.740066629315143e-09, "epoch": 4.93, "percentage": 98.62, "elapsed_time": "1:05:14", "remaining_time": "0:00:54"} +{"current_steps": 2520, "total_steps": 2545, "loss": 0.265, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4185885400596076e-09, "epoch": 4.95, "percentage": 99.02, "elapsed_time": "1:05:28", "remaining_time": "0:00:38"} +{"current_steps": 2530, "total_steps": 2545, "loss": 0.2817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.707368043975318e-10, "epoch": 4.97, "percentage": 99.41, "elapsed_time": "1:05:40", "remaining_time": "0:00:23"} +{"current_steps": 2540, "total_steps": 2545, "loss": 0.2591, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.675103000872377e-11, "epoch": 4.99, "percentage": 99.8, "elapsed_time": "1:05:55", "remaining_time": "0:00:07"} +{"current_steps": 2545, "total_steps": 2545, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "1:06:01", "remaining_time": "0:00:00"} +{"current_steps": 90, "total_steps": 90, "loss": null, "eval_loss": 0.3370112180709839, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "1:06:48", "remaining_time": "0:00:00"} diff --git a/llama2_13b_peft/unit_conversion/trainer_state.json b/llama2_13b_peft/unit_conversion/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b573d8c23554d23ac12bc48d76d0ff028e051fb2 --- /dev/null +++ b/llama2_13b_peft/unit_conversion/trainer_state.json @@ -0,0 +1,1904 @@ +{ + "best_metric": 0.3370112180709839, + "best_model_checkpoint": "ckpt/llama2_13b_fuze30_no_sys/unit_conversion_no_sys/checkpoint-2000", + "epoch": 5.0, + "eval_steps": 200, + "global_step": 2545, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 1.0320725440979004, + "learning_rate": 5e-06, + "loss": 1.6608, + "step": 10 + }, + { + "epoch": 0.04, + "grad_norm": 1.4061148166656494, + "learning_rate": 1e-05, + "loss": 1.7201, + "step": 20 + }, + { + "epoch": 0.06, + "grad_norm": 1.7033771276474, + "learning_rate": 9.99961299962427e-06, + "loss": 1.6975, + "step": 30 + }, + { + "epoch": 0.08, + "grad_norm": 1.8334242105484009, + "learning_rate": 9.998452058404793e-06, + "loss": 1.3794, + "step": 40 + }, + { + "epoch": 0.1, + "grad_norm": 1.2908776998519897, + "learning_rate": 9.996517356055444e-06, + "loss": 1.0864, + "step": 50 + }, + { + "epoch": 0.12, + "grad_norm": 0.6934323906898499, + "learning_rate": 9.99380919206844e-06, + "loss": 0.6983, + "step": 60 + }, + { + "epoch": 0.14, + "grad_norm": 0.5397089719772339, + "learning_rate": 9.990327985667972e-06, + "loss": 0.564, + "step": 70 + }, + { + "epoch": 0.16, + "grad_norm": 0.7038566470146179, + "learning_rate": 9.986074275745314e-06, + "loss": 0.5864, + "step": 80 + }, + { + "epoch": 0.18, + "grad_norm": 0.6588059067726135, + "learning_rate": 9.981048720775401e-06, + "loss": 0.4707, + "step": 90 + }, + { + "epoch": 0.2, + "grad_norm": 0.6290227174758911, + "learning_rate": 9.975252098714898e-06, + "loss": 0.5636, + "step": 100 + }, + { + "epoch": 0.22, + "grad_norm": 0.5712002515792847, + "learning_rate": 9.968685306881772e-06, + "loss": 0.4544, + "step": 110 + }, + { + "epoch": 0.24, + "grad_norm": 0.47688916325569153, + "learning_rate": 9.961349361816384e-06, + "loss": 0.472, + "step": 120 + }, + { + "epoch": 0.26, + "grad_norm": 0.5468528866767883, + "learning_rate": 9.953245399124133e-06, + "loss": 0.4864, + "step": 130 + }, + { + "epoch": 0.28, + "grad_norm": 0.5000003576278687, + "learning_rate": 9.94437467329966e-06, + "loss": 0.4197, + "step": 140 + }, + { + "epoch": 0.29, + "grad_norm": 0.5887648463249207, + "learning_rate": 9.934738557532663e-06, + "loss": 0.4712, + "step": 150 + }, + { + "epoch": 0.31, + "grad_norm": 0.40465375781059265, + "learning_rate": 9.924338543495302e-06, + "loss": 0.3896, + "step": 160 + }, + { + "epoch": 0.33, + "grad_norm": 0.5504217743873596, + "learning_rate": 9.91317624111132e-06, + "loss": 0.5073, + "step": 170 + }, + { + "epoch": 0.35, + "grad_norm": 0.6431964635848999, + "learning_rate": 9.9012533783068e-06, + "loss": 0.4667, + "step": 180 + }, + { + "epoch": 0.37, + "grad_norm": 0.4865589737892151, + "learning_rate": 9.888571800742694e-06, + "loss": 0.4281, + "step": 190 + }, + { + "epoch": 0.39, + "grad_norm": 0.6109912395477295, + "learning_rate": 9.87513347152912e-06, + "loss": 0.5422, + "step": 200 + }, + { + "epoch": 0.39, + "eval_loss": 0.47923311591148376, + "eval_runtime": 39.2164, + "eval_samples_per_second": 73.26, + "eval_steps_per_second": 2.295, + "step": 200 + }, + { + "epoch": 0.41, + "grad_norm": 0.6530150175094604, + "learning_rate": 9.860940470921457e-06, + "loss": 0.4597, + "step": 210 + }, + { + "epoch": 0.43, + "grad_norm": 0.8157398700714111, + "learning_rate": 9.845994995998332e-06, + "loss": 0.4876, + "step": 220 + }, + { + "epoch": 0.45, + "grad_norm": 0.6635129451751709, + "learning_rate": 9.830299360321509e-06, + "loss": 0.5003, + "step": 230 + }, + { + "epoch": 0.47, + "grad_norm": 0.5162450671195984, + "learning_rate": 9.813855993577753e-06, + "loss": 0.3836, + "step": 240 + }, + { + "epoch": 0.49, + "grad_norm": 0.6863852143287659, + "learning_rate": 9.796667441202705e-06, + "loss": 0.4631, + "step": 250 + }, + { + "epoch": 0.51, + "grad_norm": 0.5410252809524536, + "learning_rate": 9.778736363986854e-06, + "loss": 0.4101, + "step": 260 + }, + { + "epoch": 0.53, + "grad_norm": 0.6589216589927673, + "learning_rate": 9.76006553766365e-06, + "loss": 0.4622, + "step": 270 + }, + { + "epoch": 0.55, + "grad_norm": 0.8363272547721863, + "learning_rate": 9.740657852479815e-06, + "loss": 0.4378, + "step": 280 + }, + { + "epoch": 0.57, + "grad_norm": 0.6521047949790955, + "learning_rate": 9.72051631274793e-06, + "loss": 0.3582, + "step": 290 + }, + { + "epoch": 0.59, + "grad_norm": 0.6268656253814697, + "learning_rate": 9.699644036381374e-06, + "loss": 0.45, + "step": 300 + }, + { + "epoch": 0.61, + "grad_norm": 0.8181853294372559, + "learning_rate": 9.678044254411665e-06, + "loss": 0.3289, + "step": 310 + }, + { + "epoch": 0.63, + "grad_norm": 0.7349339723587036, + "learning_rate": 9.655720310488298e-06, + "loss": 0.3618, + "step": 320 + }, + { + "epoch": 0.65, + "grad_norm": 0.8285214304924011, + "learning_rate": 9.632675660361148e-06, + "loss": 0.3579, + "step": 330 + }, + { + "epoch": 0.67, + "grad_norm": 1.1723264455795288, + "learning_rate": 9.60891387134552e-06, + "loss": 0.4039, + "step": 340 + }, + { + "epoch": 0.69, + "grad_norm": 0.8121698498725891, + "learning_rate": 9.58443862176992e-06, + "loss": 0.4668, + "step": 350 + }, + { + "epoch": 0.71, + "grad_norm": 0.6637731194496155, + "learning_rate": 9.559253700406663e-06, + "loss": 0.4174, + "step": 360 + }, + { + "epoch": 0.73, + "grad_norm": 1.7488420009613037, + "learning_rate": 9.533363005885362e-06, + "loss": 0.4081, + "step": 370 + }, + { + "epoch": 0.75, + "grad_norm": 0.805870771408081, + "learning_rate": 9.506770546089422e-06, + "loss": 0.3972, + "step": 380 + }, + { + "epoch": 0.77, + "grad_norm": 0.5776576399803162, + "learning_rate": 9.479480437535613e-06, + "loss": 0.3379, + "step": 390 + }, + { + "epoch": 0.79, + "grad_norm": 0.8170546889305115, + "learning_rate": 9.45149690473684e-06, + "loss": 0.319, + "step": 400 + }, + { + "epoch": 0.79, + "eval_loss": 0.41680005192756653, + "eval_runtime": 39.2491, + "eval_samples_per_second": 73.199, + "eval_steps_per_second": 2.293, + "step": 400 + }, + { + "epoch": 0.81, + "grad_norm": 0.8885582089424133, + "learning_rate": 9.422824279548189e-06, + "loss": 0.454, + "step": 410 + }, + { + "epoch": 0.83, + "grad_norm": 0.9332516193389893, + "learning_rate": 9.393467000496345e-06, + "loss": 0.3599, + "step": 420 + }, + { + "epoch": 0.84, + "grad_norm": 0.7631992697715759, + "learning_rate": 9.36342961209252e-06, + "loss": 0.3763, + "step": 430 + }, + { + "epoch": 0.86, + "grad_norm": 0.7638279795646667, + "learning_rate": 9.332716764128952e-06, + "loss": 0.3931, + "step": 440 + }, + { + "epoch": 0.88, + "grad_norm": 0.6363512873649597, + "learning_rate": 9.301333210959123e-06, + "loss": 0.3708, + "step": 450 + }, + { + "epoch": 0.9, + "grad_norm": 0.7823840379714966, + "learning_rate": 9.26928381076178e-06, + "loss": 0.3863, + "step": 460 + }, + { + "epoch": 0.92, + "grad_norm": 0.8903080821037292, + "learning_rate": 9.236573524788888e-06, + "loss": 0.3334, + "step": 470 + }, + { + "epoch": 0.94, + "grad_norm": 0.8487058281898499, + "learning_rate": 9.203207416597635e-06, + "loss": 0.4054, + "step": 480 + }, + { + "epoch": 0.96, + "grad_norm": 0.7155742049217224, + "learning_rate": 9.169190651266582e-06, + "loss": 0.3992, + "step": 490 + }, + { + "epoch": 0.98, + "grad_norm": 1.163004755973816, + "learning_rate": 9.134528494596116e-06, + "loss": 0.3113, + "step": 500 + }, + { + "epoch": 1.0, + "grad_norm": 0.9353289008140564, + "learning_rate": 9.099226312293296e-06, + "loss": 0.4078, + "step": 510 + }, + { + "epoch": 1.02, + "grad_norm": 0.8487387895584106, + "learning_rate": 9.063289569141251e-06, + "loss": 0.3958, + "step": 520 + }, + { + "epoch": 1.04, + "grad_norm": 0.8114432096481323, + "learning_rate": 9.026723828153224e-06, + "loss": 0.3513, + "step": 530 + }, + { + "epoch": 1.06, + "grad_norm": 0.7491832971572876, + "learning_rate": 8.98953474971141e-06, + "loss": 0.3857, + "step": 540 + }, + { + "epoch": 1.08, + "grad_norm": 0.7338570952415466, + "learning_rate": 8.951728090690743e-06, + "loss": 0.3646, + "step": 550 + }, + { + "epoch": 1.1, + "grad_norm": 0.8805945515632629, + "learning_rate": 8.913309703567722e-06, + "loss": 0.3052, + "step": 560 + }, + { + "epoch": 1.12, + "grad_norm": 1.1915472745895386, + "learning_rate": 8.87428553551445e-06, + "loss": 0.3649, + "step": 570 + }, + { + "epoch": 1.14, + "grad_norm": 1.0756891965866089, + "learning_rate": 8.834661627478003e-06, + "loss": 0.3371, + "step": 580 + }, + { + "epoch": 1.16, + "grad_norm": 0.8875855803489685, + "learning_rate": 8.794444113245302e-06, + "loss": 0.346, + "step": 590 + }, + { + "epoch": 1.18, + "grad_norm": 0.5038532018661499, + "learning_rate": 8.753639218493594e-06, + "loss": 0.3583, + "step": 600 + }, + { + "epoch": 1.18, + "eval_loss": 0.38733917474746704, + "eval_runtime": 39.2459, + "eval_samples_per_second": 73.205, + "eval_steps_per_second": 2.293, + "step": 600 + }, + { + "epoch": 1.2, + "grad_norm": 0.8062039613723755, + "learning_rate": 8.712253259826719e-06, + "loss": 0.3845, + "step": 610 + }, + { + "epoch": 1.22, + "grad_norm": 1.308142066001892, + "learning_rate": 8.670292643797302e-06, + "loss": 0.431, + "step": 620 + }, + { + "epoch": 1.24, + "grad_norm": 0.8702178597450256, + "learning_rate": 8.627763865915005e-06, + "loss": 0.3512, + "step": 630 + }, + { + "epoch": 1.26, + "grad_norm": 0.7164438962936401, + "learning_rate": 8.58467350964104e-06, + "loss": 0.3475, + "step": 640 + }, + { + "epoch": 1.28, + "grad_norm": 1.3485796451568604, + "learning_rate": 8.541028245369033e-06, + "loss": 0.3372, + "step": 650 + }, + { + "epoch": 1.3, + "grad_norm": 0.9907482266426086, + "learning_rate": 8.496834829392454e-06, + "loss": 0.3133, + "step": 660 + }, + { + "epoch": 1.32, + "grad_norm": 0.744209885597229, + "learning_rate": 8.452100102858734e-06, + "loss": 0.3274, + "step": 670 + }, + { + "epoch": 1.34, + "grad_norm": 1.4934322834014893, + "learning_rate": 8.40683099071027e-06, + "loss": 0.3771, + "step": 680 + }, + { + "epoch": 1.36, + "grad_norm": 0.9607213735580444, + "learning_rate": 8.361034500612421e-06, + "loss": 0.3636, + "step": 690 + }, + { + "epoch": 1.38, + "grad_norm": 0.31818854808807373, + "learning_rate": 8.31471772186874e-06, + "loss": 0.3026, + "step": 700 + }, + { + "epoch": 1.39, + "grad_norm": 1.0681700706481934, + "learning_rate": 8.267887824323537e-06, + "loss": 0.3075, + "step": 710 + }, + { + "epoch": 1.41, + "grad_norm": 0.7339015603065491, + "learning_rate": 8.22055205725199e-06, + "loss": 0.2668, + "step": 720 + }, + { + "epoch": 1.43, + "grad_norm": 0.9504737854003906, + "learning_rate": 8.172717748237955e-06, + "loss": 0.2809, + "step": 730 + }, + { + "epoch": 1.45, + "grad_norm": 1.125591516494751, + "learning_rate": 8.124392302039658e-06, + "loss": 0.2968, + "step": 740 + }, + { + "epoch": 1.47, + "grad_norm": 0.9738477468490601, + "learning_rate": 8.075583199443431e-06, + "loss": 0.3148, + "step": 750 + }, + { + "epoch": 1.49, + "grad_norm": 1.076797604560852, + "learning_rate": 8.026297996105694e-06, + "loss": 0.3266, + "step": 760 + }, + { + "epoch": 1.51, + "grad_norm": 0.5665436387062073, + "learning_rate": 7.97654432138333e-06, + "loss": 0.3143, + "step": 770 + }, + { + "epoch": 1.53, + "grad_norm": 0.8260796070098877, + "learning_rate": 7.926329877152665e-06, + "loss": 0.3853, + "step": 780 + }, + { + "epoch": 1.55, + "grad_norm": 0.9493719339370728, + "learning_rate": 7.875662436617211e-06, + "loss": 0.3196, + "step": 790 + }, + { + "epoch": 1.57, + "grad_norm": 1.1995518207550049, + "learning_rate": 7.824549843104378e-06, + "loss": 0.3048, + "step": 800 + }, + { + "epoch": 1.57, + "eval_loss": 0.369180291891098, + "eval_runtime": 39.26, + "eval_samples_per_second": 73.179, + "eval_steps_per_second": 2.292, + "step": 800 + }, + { + "epoch": 1.59, + "grad_norm": 0.8687970042228699, + "learning_rate": 7.773000008851323e-06, + "loss": 0.3529, + "step": 810 + }, + { + "epoch": 1.61, + "grad_norm": 1.1251832246780396, + "learning_rate": 7.721020913780137e-06, + "loss": 0.4047, + "step": 820 + }, + { + "epoch": 1.63, + "grad_norm": 1.1053804159164429, + "learning_rate": 7.668620604262548e-06, + "loss": 0.3268, + "step": 830 + }, + { + "epoch": 1.65, + "grad_norm": 0.8911699652671814, + "learning_rate": 7.615807191874349e-06, + "loss": 0.3198, + "step": 840 + }, + { + "epoch": 1.67, + "grad_norm": 1.4273747205734253, + "learning_rate": 7.56258885213971e-06, + "loss": 0.3856, + "step": 850 + }, + { + "epoch": 1.69, + "grad_norm": 0.9190964102745056, + "learning_rate": 7.50897382326562e-06, + "loss": 0.2915, + "step": 860 + }, + { + "epoch": 1.71, + "grad_norm": 0.6233330965042114, + "learning_rate": 7.454970404866612e-06, + "loss": 0.2955, + "step": 870 + }, + { + "epoch": 1.73, + "grad_norm": 1.0920981168746948, + "learning_rate": 7.400586956679965e-06, + "loss": 0.3256, + "step": 880 + }, + { + "epoch": 1.75, + "grad_norm": 1.1413133144378662, + "learning_rate": 7.345831897271636e-06, + "loss": 0.3329, + "step": 890 + }, + { + "epoch": 1.77, + "grad_norm": 0.7899062037467957, + "learning_rate": 7.2907137027330455e-06, + "loss": 0.4319, + "step": 900 + }, + { + "epoch": 1.79, + "grad_norm": 1.382123351097107, + "learning_rate": 7.235240905368997e-06, + "loss": 0.3695, + "step": 910 + }, + { + "epoch": 1.81, + "grad_norm": 1.0356751680374146, + "learning_rate": 7.179422092376856e-06, + "loss": 0.2896, + "step": 920 + }, + { + "epoch": 1.83, + "grad_norm": 1.0618700981140137, + "learning_rate": 7.123265904517264e-06, + "loss": 0.3428, + "step": 930 + }, + { + "epoch": 1.85, + "grad_norm": 1.2953617572784424, + "learning_rate": 7.066781034776546e-06, + "loss": 0.3375, + "step": 940 + }, + { + "epoch": 1.87, + "grad_norm": 0.5693756937980652, + "learning_rate": 7.009976227021018e-06, + "loss": 0.3072, + "step": 950 + }, + { + "epoch": 1.89, + "grad_norm": 1.263010859489441, + "learning_rate": 6.952860274643466e-06, + "loss": 0.3428, + "step": 960 + }, + { + "epoch": 1.91, + "grad_norm": 1.0063074827194214, + "learning_rate": 6.895442019201898e-06, + "loss": 0.374, + "step": 970 + }, + { + "epoch": 1.93, + "grad_norm": 1.2151877880096436, + "learning_rate": 6.837730349050886e-06, + "loss": 0.3584, + "step": 980 + }, + { + "epoch": 1.94, + "grad_norm": 0.551629364490509, + "learning_rate": 6.7797341979656454e-06, + "loss": 0.2854, + "step": 990 + }, + { + "epoch": 1.96, + "grad_norm": 0.9264897108078003, + "learning_rate": 6.721462543759077e-06, + "loss": 0.4185, + "step": 1000 + }, + { + "epoch": 1.96, + "eval_loss": 0.35499048233032227, + "eval_runtime": 39.2232, + "eval_samples_per_second": 73.247, + "eval_steps_per_second": 2.295, + "step": 1000 + }, + { + "epoch": 1.98, + "grad_norm": 1.1430025100708008, + "learning_rate": 6.6629244068920155e-06, + "loss": 0.3806, + "step": 1010 + }, + { + "epoch": 2.0, + "grad_norm": 1.0283466577529907, + "learning_rate": 6.6041288490768385e-06, + "loss": 0.2947, + "step": 1020 + }, + { + "epoch": 2.02, + "grad_norm": 0.675986647605896, + "learning_rate": 6.545084971874738e-06, + "loss": 0.2732, + "step": 1030 + }, + { + "epoch": 2.04, + "grad_norm": 0.8484103083610535, + "learning_rate": 6.485801915286776e-06, + "loss": 0.2952, + "step": 1040 + }, + { + "epoch": 2.06, + "grad_norm": 0.9744128584861755, + "learning_rate": 6.426288856339022e-06, + "loss": 0.2251, + "step": 1050 + }, + { + "epoch": 2.08, + "grad_norm": 1.0789810419082642, + "learning_rate": 6.366555007661949e-06, + "loss": 0.2822, + "step": 1060 + }, + { + "epoch": 2.1, + "grad_norm": 0.9231953620910645, + "learning_rate": 6.306609616064304e-06, + "loss": 0.278, + "step": 1070 + }, + { + "epoch": 2.12, + "grad_norm": 1.3422801494598389, + "learning_rate": 6.246461961101721e-06, + "loss": 0.2213, + "step": 1080 + }, + { + "epoch": 2.14, + "grad_norm": 0.9151216745376587, + "learning_rate": 6.186121353640223e-06, + "loss": 0.3001, + "step": 1090 + }, + { + "epoch": 2.16, + "grad_norm": 1.156983494758606, + "learning_rate": 6.125597134414919e-06, + "loss": 0.3357, + "step": 1100 + }, + { + "epoch": 2.18, + "grad_norm": 1.0727161169052124, + "learning_rate": 6.064898672584036e-06, + "loss": 0.3637, + "step": 1110 + }, + { + "epoch": 2.2, + "grad_norm": 1.6018846035003662, + "learning_rate": 6.004035364278593e-06, + "loss": 0.4342, + "step": 1120 + }, + { + "epoch": 2.22, + "grad_norm": 1.2866791486740112, + "learning_rate": 5.94301663114786e-06, + "loss": 0.3534, + "step": 1130 + }, + { + "epoch": 2.24, + "grad_norm": 1.2443523406982422, + "learning_rate": 5.881851918900897e-06, + "loss": 0.271, + "step": 1140 + }, + { + "epoch": 2.26, + "grad_norm": 1.3801060914993286, + "learning_rate": 5.820550695844354e-06, + "loss": 0.3774, + "step": 1150 + }, + { + "epoch": 2.28, + "grad_norm": 1.0210528373718262, + "learning_rate": 5.759122451416769e-06, + "loss": 0.2967, + "step": 1160 + }, + { + "epoch": 2.3, + "grad_norm": 1.162329077720642, + "learning_rate": 5.697576694719616e-06, + "loss": 0.2556, + "step": 1170 + }, + { + "epoch": 2.32, + "grad_norm": 0.8746941685676575, + "learning_rate": 5.635922953045278e-06, + "loss": 0.3435, + "step": 1180 + }, + { + "epoch": 2.34, + "grad_norm": 1.5948892831802368, + "learning_rate": 5.574170770402236e-06, + "loss": 0.3064, + "step": 1190 + }, + { + "epoch": 2.36, + "grad_norm": 1.2417634725570679, + "learning_rate": 5.512329706037643e-06, + "loss": 0.3737, + "step": 1200 + }, + { + "epoch": 2.36, + "eval_loss": 0.34871676564216614, + "eval_runtime": 39.2425, + "eval_samples_per_second": 73.211, + "eval_steps_per_second": 2.293, + "step": 1200 + }, + { + "epoch": 2.38, + "grad_norm": 1.1724433898925781, + "learning_rate": 5.4504093329575546e-06, + "loss": 0.3051, + "step": 1210 + }, + { + "epoch": 2.4, + "grad_norm": 1.51445734500885, + "learning_rate": 5.388419236445033e-06, + "loss": 0.3564, + "step": 1220 + }, + { + "epoch": 2.42, + "grad_norm": 1.1924351453781128, + "learning_rate": 5.3263690125763316e-06, + "loss": 0.3243, + "step": 1230 + }, + { + "epoch": 2.44, + "grad_norm": 1.434472918510437, + "learning_rate": 5.264268266735432e-06, + "loss": 0.2923, + "step": 1240 + }, + { + "epoch": 2.46, + "grad_norm": 1.9453471899032593, + "learning_rate": 5.202126612127125e-06, + "loss": 0.3074, + "step": 1250 + }, + { + "epoch": 2.48, + "grad_norm": 1.517500877380371, + "learning_rate": 5.139953668288883e-06, + "loss": 0.3111, + "step": 1260 + }, + { + "epoch": 2.5, + "grad_norm": 2.1911709308624268, + "learning_rate": 5.077759059601756e-06, + "loss": 0.3539, + "step": 1270 + }, + { + "epoch": 2.51, + "grad_norm": 1.6386833190917969, + "learning_rate": 5.015552413800514e-06, + "loss": 0.2223, + "step": 1280 + }, + { + "epoch": 2.53, + "grad_norm": 0.7657915949821472, + "learning_rate": 4.953343360483281e-06, + "loss": 0.2714, + "step": 1290 + }, + { + "epoch": 2.55, + "grad_norm": 1.4364550113677979, + "learning_rate": 4.8911415296208555e-06, + "loss": 0.2568, + "step": 1300 + }, + { + "epoch": 2.57, + "grad_norm": 1.270623803138733, + "learning_rate": 4.828956550066006e-06, + "loss": 0.2363, + "step": 1310 + }, + { + "epoch": 2.59, + "grad_norm": 1.8665142059326172, + "learning_rate": 4.766798048062913e-06, + "loss": 0.4116, + "step": 1320 + }, + { + "epoch": 2.61, + "grad_norm": 1.2894102334976196, + "learning_rate": 4.704675645757028e-06, + "loss": 0.3128, + "step": 1330 + }, + { + "epoch": 2.63, + "grad_norm": 1.0780175924301147, + "learning_rate": 4.642598959705565e-06, + "loss": 0.2368, + "step": 1340 + }, + { + "epoch": 2.65, + "grad_norm": 0.9098504185676575, + "learning_rate": 4.580577599388854e-06, + "loss": 0.3453, + "step": 1350 + }, + { + "epoch": 2.67, + "grad_norm": 1.0072888135910034, + "learning_rate": 4.518621165722794e-06, + "loss": 0.2893, + "step": 1360 + }, + { + "epoch": 2.69, + "grad_norm": 0.721864640712738, + "learning_rate": 4.456739249572628e-06, + "loss": 0.3422, + "step": 1370 + }, + { + "epoch": 2.71, + "grad_norm": 1.9018405675888062, + "learning_rate": 4.394941430268275e-06, + "loss": 0.3076, + "step": 1380 + }, + { + "epoch": 2.73, + "grad_norm": 0.8948503732681274, + "learning_rate": 4.333237274121453e-06, + "loss": 0.3504, + "step": 1390 + }, + { + "epoch": 2.75, + "grad_norm": 2.278956413269043, + "learning_rate": 4.271636332944806e-06, + "loss": 0.2418, + "step": 1400 + }, + { + "epoch": 2.75, + "eval_loss": 0.3422459363937378, + "eval_runtime": 39.2204, + "eval_samples_per_second": 73.253, + "eval_steps_per_second": 2.295, + "step": 1400 + }, + { + "epoch": 2.77, + "grad_norm": 1.2129451036453247, + "learning_rate": 4.210148142573287e-06, + "loss": 0.3114, + "step": 1410 + }, + { + "epoch": 2.79, + "grad_norm": 1.1083780527114868, + "learning_rate": 4.148782221388007e-06, + "loss": 0.3683, + "step": 1420 + }, + { + "epoch": 2.81, + "grad_norm": 1.3645988702774048, + "learning_rate": 4.087548068842787e-06, + "loss": 0.3036, + "step": 1430 + }, + { + "epoch": 2.83, + "grad_norm": 0.9339216947555542, + "learning_rate": 4.026455163993645e-06, + "loss": 0.3015, + "step": 1440 + }, + { + "epoch": 2.85, + "grad_norm": 1.083004355430603, + "learning_rate": 3.965512964031435e-06, + "loss": 0.2794, + "step": 1450 + }, + { + "epoch": 2.87, + "grad_norm": 1.1303693056106567, + "learning_rate": 3.904730902817867e-06, + "loss": 0.2911, + "step": 1460 + }, + { + "epoch": 2.89, + "grad_norm": 1.3334907293319702, + "learning_rate": 3.844118389425154e-06, + "loss": 0.2511, + "step": 1470 + }, + { + "epoch": 2.91, + "grad_norm": 2.2911875247955322, + "learning_rate": 3.7836848066794763e-06, + "loss": 0.3791, + "step": 1480 + }, + { + "epoch": 2.93, + "grad_norm": 1.7196086645126343, + "learning_rate": 3.7234395097085298e-06, + "loss": 0.3396, + "step": 1490 + }, + { + "epoch": 2.95, + "grad_norm": 0.5811883211135864, + "learning_rate": 3.663391824493336e-06, + "loss": 0.1901, + "step": 1500 + }, + { + "epoch": 2.97, + "grad_norm": 2.1780526638031006, + "learning_rate": 3.6035510464245937e-06, + "loss": 0.3611, + "step": 1510 + }, + { + "epoch": 2.99, + "grad_norm": 2.1352715492248535, + "learning_rate": 3.5439264388637407e-06, + "loss": 0.2753, + "step": 1520 + }, + { + "epoch": 3.01, + "grad_norm": 0.9786732792854309, + "learning_rate": 3.4845272317089882e-06, + "loss": 0.2861, + "step": 1530 + }, + { + "epoch": 3.03, + "grad_norm": 1.4434765577316284, + "learning_rate": 3.4253626199665314e-06, + "loss": 0.2989, + "step": 1540 + }, + { + "epoch": 3.05, + "grad_norm": 2.0777690410614014, + "learning_rate": 3.3664417623271594e-06, + "loss": 0.3478, + "step": 1550 + }, + { + "epoch": 3.06, + "grad_norm": 1.3415099382400513, + "learning_rate": 3.3077737797484923e-06, + "loss": 0.2654, + "step": 1560 + }, + { + "epoch": 3.08, + "grad_norm": 0.7920995950698853, + "learning_rate": 3.249367754043047e-06, + "loss": 0.2063, + "step": 1570 + }, + { + "epoch": 3.1, + "grad_norm": 1.4506205320358276, + "learning_rate": 3.1912327264723843e-06, + "loss": 0.3329, + "step": 1580 + }, + { + "epoch": 3.12, + "grad_norm": 1.1375677585601807, + "learning_rate": 3.133377696347506e-06, + "loss": 0.3934, + "step": 1590 + }, + { + "epoch": 3.14, + "grad_norm": 1.2222238779067993, + "learning_rate": 3.075811619635774e-06, + "loss": 0.2528, + "step": 1600 + }, + { + "epoch": 3.14, + "eval_loss": 0.33900028467178345, + "eval_runtime": 39.2361, + "eval_samples_per_second": 73.223, + "eval_steps_per_second": 2.294, + "step": 1600 + }, + { + "epoch": 3.16, + "grad_norm": 1.0245050191879272, + "learning_rate": 3.0185434075745124e-06, + "loss": 0.247, + "step": 1610 + }, + { + "epoch": 3.18, + "grad_norm": 1.194373607635498, + "learning_rate": 2.961581925291557e-06, + "loss": 0.313, + "step": 1620 + }, + { + "epoch": 3.2, + "grad_norm": 1.9943459033966064, + "learning_rate": 2.9049359904329234e-06, + "loss": 0.3331, + "step": 1630 + }, + { + "epoch": 3.22, + "grad_norm": 1.5466620922088623, + "learning_rate": 2.8486143717978448e-06, + "loss": 0.2684, + "step": 1640 + }, + { + "epoch": 3.24, + "grad_norm": 1.1305476427078247, + "learning_rate": 2.792625787981349e-06, + "loss": 0.2743, + "step": 1650 + }, + { + "epoch": 3.26, + "grad_norm": 1.368371605873108, + "learning_rate": 2.736978906024626e-06, + "loss": 0.3512, + "step": 1660 + }, + { + "epoch": 3.28, + "grad_norm": 1.7816555500030518, + "learning_rate": 2.6816823400733628e-06, + "loss": 0.2919, + "step": 1670 + }, + { + "epoch": 3.3, + "grad_norm": 1.249085783958435, + "learning_rate": 2.626744650044284e-06, + "loss": 0.322, + "step": 1680 + }, + { + "epoch": 3.32, + "grad_norm": 1.6129003763198853, + "learning_rate": 2.572174340300061e-06, + "loss": 0.2349, + "step": 1690 + }, + { + "epoch": 3.34, + "grad_norm": 2.0825748443603516, + "learning_rate": 2.5179798583328415e-06, + "loss": 0.2368, + "step": 1700 + }, + { + "epoch": 3.36, + "grad_norm": 0.43531525135040283, + "learning_rate": 2.4641695934565827e-06, + "loss": 0.3711, + "step": 1710 + }, + { + "epoch": 3.38, + "grad_norm": 1.051423192024231, + "learning_rate": 2.410751875508373e-06, + "loss": 0.2323, + "step": 1720 + }, + { + "epoch": 3.4, + "grad_norm": 1.935787320137024, + "learning_rate": 2.3577349735589787e-06, + "loss": 0.3185, + "step": 1730 + }, + { + "epoch": 3.42, + "grad_norm": 1.971083641052246, + "learning_rate": 2.3051270946327887e-06, + "loss": 0.2479, + "step": 1740 + }, + { + "epoch": 3.44, + "grad_norm": 1.8540164232254028, + "learning_rate": 2.2529363824373707e-06, + "loss": 0.3738, + "step": 1750 + }, + { + "epoch": 3.46, + "grad_norm": 2.1510169506073, + "learning_rate": 2.2011709161028156e-06, + "loss": 0.3481, + "step": 1760 + }, + { + "epoch": 3.48, + "grad_norm": 1.3573346138000488, + "learning_rate": 2.149838708931087e-06, + "loss": 0.2863, + "step": 1770 + }, + { + "epoch": 3.5, + "grad_norm": 0.8732242584228516, + "learning_rate": 2.0989477071555757e-06, + "loss": 0.2415, + "step": 1780 + }, + { + "epoch": 3.52, + "grad_norm": 1.0924676656723022, + "learning_rate": 2.0485057887110026e-06, + "loss": 0.2989, + "step": 1790 + }, + { + "epoch": 3.54, + "grad_norm": 1.789581298828125, + "learning_rate": 1.998520762013923e-06, + "loss": 0.3192, + "step": 1800 + }, + { + "epoch": 3.54, + "eval_loss": 0.33926254510879517, + "eval_runtime": 39.2683, + "eval_samples_per_second": 73.163, + "eval_steps_per_second": 2.292, + "step": 1800 + }, + { + "epoch": 3.56, + "grad_norm": 1.4803309440612793, + "learning_rate": 1.949000364753985e-06, + "loss": 0.2484, + "step": 1810 + }, + { + "epoch": 3.58, + "grad_norm": 2.3573765754699707, + "learning_rate": 1.8999522626961254e-06, + "loss": 0.2761, + "step": 1820 + }, + { + "epoch": 3.6, + "grad_norm": 1.3212318420410156, + "learning_rate": 1.851384048493914e-06, + "loss": 0.2972, + "step": 1830 + }, + { + "epoch": 3.61, + "grad_norm": 1.6966694593429565, + "learning_rate": 1.8033032405142075e-06, + "loss": 0.2279, + "step": 1840 + }, + { + "epoch": 3.63, + "grad_norm": 1.9742337465286255, + "learning_rate": 1.7557172816733104e-06, + "loss": 0.2662, + "step": 1850 + }, + { + "epoch": 3.65, + "grad_norm": 1.3778648376464844, + "learning_rate": 1.7086335382848012e-06, + "loss": 0.3054, + "step": 1860 + }, + { + "epoch": 3.67, + "grad_norm": 1.8162543773651123, + "learning_rate": 1.6620592989192318e-06, + "loss": 0.3051, + "step": 1870 + }, + { + "epoch": 3.69, + "grad_norm": 1.5290098190307617, + "learning_rate": 1.6160017732758588e-06, + "loss": 0.2765, + "step": 1880 + }, + { + "epoch": 3.71, + "grad_norm": 1.4022248983383179, + "learning_rate": 1.5704680910665721e-06, + "loss": 0.2839, + "step": 1890 + }, + { + "epoch": 3.73, + "grad_norm": 1.5009647607803345, + "learning_rate": 1.5254653009122206e-06, + "loss": 0.2527, + "step": 1900 + }, + { + "epoch": 3.75, + "grad_norm": 1.65267813205719, + "learning_rate": 1.4810003692514829e-06, + "loss": 0.2268, + "step": 1910 + }, + { + "epoch": 3.77, + "grad_norm": 1.7568740844726562, + "learning_rate": 1.4370801792624656e-06, + "loss": 0.3011, + "step": 1920 + }, + { + "epoch": 3.79, + "grad_norm": 1.2601398229599, + "learning_rate": 1.3937115297971788e-06, + "loss": 0.2938, + "step": 1930 + }, + { + "epoch": 3.81, + "grad_norm": 1.4082847833633423, + "learning_rate": 1.3509011343290767e-06, + "loss": 0.2375, + "step": 1940 + }, + { + "epoch": 3.83, + "grad_norm": 1.3604422807693481, + "learning_rate": 1.3086556199138117e-06, + "loss": 0.2748, + "step": 1950 + }, + { + "epoch": 3.85, + "grad_norm": 1.3298903703689575, + "learning_rate": 1.2669815261633666e-06, + "loss": 0.2703, + "step": 1960 + }, + { + "epoch": 3.87, + "grad_norm": 1.3025224208831787, + "learning_rate": 1.225885304233716e-06, + "loss": 0.2331, + "step": 1970 + }, + { + "epoch": 3.89, + "grad_norm": 0.9569131135940552, + "learning_rate": 1.1853733158261894e-06, + "loss": 0.3299, + "step": 1980 + }, + { + "epoch": 3.91, + "grad_norm": 1.0660338401794434, + "learning_rate": 1.1454518322026836e-06, + "loss": 0.2159, + "step": 1990 + }, + { + "epoch": 3.93, + "grad_norm": 0.9487476944923401, + "learning_rate": 1.1061270332148626e-06, + "loss": 0.2834, + "step": 2000 + }, + { + "epoch": 3.93, + "eval_loss": 0.3370112180709839, + "eval_runtime": 39.2536, + "eval_samples_per_second": 73.191, + "eval_steps_per_second": 2.293, + "step": 2000 + }, + { + "epoch": 3.95, + "grad_norm": 0.9943105578422546, + "learning_rate": 1.06740500634752e-06, + "loss": 0.2315, + "step": 2010 + }, + { + "epoch": 3.97, + "grad_norm": 1.6915420293807983, + "learning_rate": 1.0292917457762325e-06, + "loss": 0.32, + "step": 2020 + }, + { + "epoch": 3.99, + "grad_norm": 1.3354073762893677, + "learning_rate": 9.917931514394674e-07, + "loss": 0.3086, + "step": 2030 + }, + { + "epoch": 4.01, + "grad_norm": 1.4191135168075562, + "learning_rate": 9.549150281252633e-07, + "loss": 0.327, + "step": 2040 + }, + { + "epoch": 4.03, + "grad_norm": 1.4209463596343994, + "learning_rate": 9.186630845726497e-07, + "loss": 0.2789, + "step": 2050 + }, + { + "epoch": 4.05, + "grad_norm": 1.3217524290084839, + "learning_rate": 8.830429325879397e-07, + "loss": 0.305, + "step": 2060 + }, + { + "epoch": 4.07, + "grad_norm": 0.6172815561294556, + "learning_rate": 8.480600861760124e-07, + "loss": 0.2732, + "step": 2070 + }, + { + "epoch": 4.09, + "grad_norm": 0.9870301485061646, + "learning_rate": 8.137199606867507e-07, + "loss": 0.2897, + "step": 2080 + }, + { + "epoch": 4.11, + "grad_norm": 0.8269962072372437, + "learning_rate": 7.800278719767407e-07, + "loss": 0.3181, + "step": 2090 + }, + { + "epoch": 4.13, + "grad_norm": 0.7305634021759033, + "learning_rate": 7.469890355863785e-07, + "loss": 0.2144, + "step": 2100 + }, + { + "epoch": 4.15, + "grad_norm": 0.5976698398590088, + "learning_rate": 7.146085659325025e-07, + "loss": 0.294, + "step": 2110 + }, + { + "epoch": 4.17, + "grad_norm": 1.456742763519287, + "learning_rate": 6.828914755166826e-07, + "loss": 0.2729, + "step": 2120 + }, + { + "epoch": 4.18, + "grad_norm": 1.1731284856796265, + "learning_rate": 6.518426741492823e-07, + "loss": 0.2509, + "step": 2130 + }, + { + "epoch": 4.2, + "grad_norm": 1.3907181024551392, + "learning_rate": 6.214669681894181e-07, + "loss": 0.2271, + "step": 2140 + }, + { + "epoch": 4.22, + "grad_norm": 1.0336577892303467, + "learning_rate": 5.917690598009385e-07, + "loss": 0.2703, + "step": 2150 + }, + { + "epoch": 4.24, + "grad_norm": 0.9870587587356567, + "learning_rate": 5.62753546224526e-07, + "loss": 0.2339, + "step": 2160 + }, + { + "epoch": 4.26, + "grad_norm": 1.3517531156539917, + "learning_rate": 5.344249190660427e-07, + "loss": 0.3029, + "step": 2170 + }, + { + "epoch": 4.28, + "grad_norm": 1.3691924810409546, + "learning_rate": 5.067875636012304e-07, + "loss": 0.3205, + "step": 2180 + }, + { + "epoch": 4.3, + "grad_norm": 1.9304887056350708, + "learning_rate": 4.798457580968674e-07, + "loss": 0.2875, + "step": 2190 + }, + { + "epoch": 4.32, + "grad_norm": 1.0140875577926636, + "learning_rate": 4.536036731484972e-07, + "loss": 0.3612, + "step": 2200 + }, + { + "epoch": 4.32, + "eval_loss": 0.337531715631485, + "eval_runtime": 39.2368, + "eval_samples_per_second": 73.222, + "eval_steps_per_second": 2.294, + "step": 2200 + }, + { + "epoch": 4.34, + "grad_norm": 2.371439218521118, + "learning_rate": 4.280653710348126e-07, + "loss": 0.2825, + "step": 2210 + }, + { + "epoch": 4.36, + "grad_norm": 1.9617916345596313, + "learning_rate": 4.03234805088818e-07, + "loss": 0.323, + "step": 2220 + }, + { + "epoch": 4.38, + "grad_norm": 1.9243499040603638, + "learning_rate": 3.7911581908585626e-07, + "loss": 0.2784, + "step": 2230 + }, + { + "epoch": 4.4, + "grad_norm": 1.2725778818130493, + "learning_rate": 3.5571214664858356e-07, + "loss": 0.2632, + "step": 2240 + }, + { + "epoch": 4.42, + "grad_norm": 1.0478254556655884, + "learning_rate": 3.330274106690112e-07, + "loss": 0.2192, + "step": 2250 + }, + { + "epoch": 4.44, + "grad_norm": 0.705704927444458, + "learning_rate": 3.110651227476774e-07, + "loss": 0.2647, + "step": 2260 + }, + { + "epoch": 4.46, + "grad_norm": 1.8936996459960938, + "learning_rate": 2.8982868265005457e-07, + "loss": 0.2897, + "step": 2270 + }, + { + "epoch": 4.48, + "grad_norm": 1.3405539989471436, + "learning_rate": 2.693213777802606e-07, + "loss": 0.3023, + "step": 2280 + }, + { + "epoch": 4.5, + "grad_norm": 1.2517961263656616, + "learning_rate": 2.495463826721711e-07, + "loss": 0.2419, + "step": 2290 + }, + { + "epoch": 4.52, + "grad_norm": 2.112408399581909, + "learning_rate": 2.305067584980014e-07, + "loss": 0.3104, + "step": 2300 + }, + { + "epoch": 4.54, + "grad_norm": 1.2434989213943481, + "learning_rate": 2.1220545259443558e-07, + "loss": 0.328, + "step": 2310 + }, + { + "epoch": 4.56, + "grad_norm": 1.478872299194336, + "learning_rate": 1.9464529800637731e-07, + "loss": 0.307, + "step": 2320 + }, + { + "epoch": 4.58, + "grad_norm": 1.822096347808838, + "learning_rate": 1.7782901304839617e-07, + "loss": 0.2437, + "step": 2330 + }, + { + "epoch": 4.6, + "grad_norm": 1.8473854064941406, + "learning_rate": 1.617592008839308e-07, + "loss": 0.2862, + "step": 2340 + }, + { + "epoch": 4.62, + "grad_norm": 1.2884219884872437, + "learning_rate": 1.4643834912232035e-07, + "loss": 0.2901, + "step": 2350 + }, + { + "epoch": 4.64, + "grad_norm": 1.3200654983520508, + "learning_rate": 1.3186882943371892e-07, + "loss": 0.2482, + "step": 2360 + }, + { + "epoch": 4.66, + "grad_norm": 1.8138701915740967, + "learning_rate": 1.1805289718196499e-07, + "loss": 0.2957, + "step": 2370 + }, + { + "epoch": 4.68, + "grad_norm": 1.3670308589935303, + "learning_rate": 1.0499269107544674e-07, + "loss": 0.3179, + "step": 2380 + }, + { + "epoch": 4.7, + "grad_norm": 1.4334827661514282, + "learning_rate": 9.269023283603295e-08, + "loss": 0.277, + "step": 2390 + }, + { + "epoch": 4.72, + "grad_norm": 1.5253217220306396, + "learning_rate": 8.114742688610789e-08, + "loss": 0.2732, + "step": 2400 + }, + { + "epoch": 4.72, + "eval_loss": 0.33687904477119446, + "eval_runtime": 39.2327, + "eval_samples_per_second": 73.23, + "eval_steps_per_second": 2.294, + "step": 2400 + }, + { + "epoch": 4.73, + "grad_norm": 1.0200579166412354, + "learning_rate": 7.036606005376701e-08, + "loss": 0.2598, + "step": 2410 + }, + { + "epoch": 4.75, + "grad_norm": 1.607457160949707, + "learning_rate": 6.034780129621664e-08, + "loss": 0.2599, + "step": 2420 + }, + { + "epoch": 4.77, + "grad_norm": 1.8155800104141235, + "learning_rate": 5.1094201441417855e-08, + "loss": 0.2699, + "step": 2430 + }, + { + "epoch": 4.79, + "grad_norm": 0.9994600415229797, + "learning_rate": 4.260669294801956e-08, + "loss": 0.2819, + "step": 2440 + }, + { + "epoch": 4.81, + "grad_norm": 2.0200681686401367, + "learning_rate": 3.488658968361147e-08, + "loss": 0.2666, + "step": 2450 + }, + { + "epoch": 4.83, + "grad_norm": 1.5585851669311523, + "learning_rate": 2.79350867213396e-08, + "loss": 0.2418, + "step": 2460 + }, + { + "epoch": 4.85, + "grad_norm": 1.5332939624786377, + "learning_rate": 2.1753260154906973e-08, + "loss": 0.1971, + "step": 2470 + }, + { + "epoch": 4.87, + "grad_norm": 2.014511823654175, + "learning_rate": 1.6342066931995804e-08, + "loss": 0.3358, + "step": 2480 + }, + { + "epoch": 4.89, + "grad_norm": 1.5106168985366821, + "learning_rate": 1.1702344706129298e-08, + "loss": 0.2409, + "step": 2490 + }, + { + "epoch": 4.91, + "grad_norm": 1.561906337738037, + "learning_rate": 7.834811707005952e-09, + "loss": 0.2549, + "step": 2500 + }, + { + "epoch": 4.93, + "grad_norm": 1.239657998085022, + "learning_rate": 4.740066629315143e-09, + "loss": 0.2957, + "step": 2510 + }, + { + "epoch": 4.95, + "grad_norm": 1.6581403017044067, + "learning_rate": 2.4185885400596076e-09, + "loss": 0.265, + "step": 2520 + }, + { + "epoch": 4.97, + "grad_norm": 1.327383279800415, + "learning_rate": 8.707368043975318e-10, + "loss": 0.2817, + "step": 2530 + }, + { + "epoch": 4.99, + "grad_norm": 1.1941030025482178, + "learning_rate": 9.675103000872377e-11, + "loss": 0.2591, + "step": 2540 + }, + { + "epoch": 5.0, + "step": 2545, + "total_flos": 1.1692495178962043e+18, + "train_loss": 0.3501229747105207, + "train_runtime": 3965.184, + "train_samples_per_second": 20.526, + "train_steps_per_second": 0.642 + } + ], + "logging_steps": 10, + "max_steps": 2545, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 1000, + "total_flos": 1.1692495178962043e+18, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/llama2_13b_peft/unit_conversion/training_args.bin b/llama2_13b_peft/unit_conversion/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..11796cc9d020e6f2ef8a8c9f70558557b6567a57 --- /dev/null +++ b/llama2_13b_peft/unit_conversion/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94dbc1a54d5022eafa983608a03720d70f2b0138f9618bcd0f3687c51efaaa6c +size 5112 diff --git a/llama2_13b_peft/unit_conversion/training_eval_loss.png b/llama2_13b_peft/unit_conversion/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..aa7a09f258caf41fb2b8978994441b1bd0856703 Binary files /dev/null and b/llama2_13b_peft/unit_conversion/training_eval_loss.png differ diff --git a/llama2_13b_peft/unit_conversion/training_loss.png b/llama2_13b_peft/unit_conversion/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..97803724eaa94fd2971fd2b69f46568dfd5e3f92 Binary files /dev/null and b/llama2_13b_peft/unit_conversion/training_loss.png differ diff --git a/llama2_13b_peft/winowhy/README.md b/llama2_13b_peft/winowhy/README.md new file mode 100644 index 0000000000000000000000000000000000000000..db2e51adfbc94385dd649b3960b6a4af9a4ebfb7 --- /dev/null +++ b/llama2_13b_peft/winowhy/README.md @@ -0,0 +1,71 @@ +--- +license: other +library_name: peft +tags: +- llama-factory +- lora +- generated_from_trainer +base_model: /data1/model/llama2/meta-llama/Llama2-13b +model-index: +- name: winowhy_no_sys + results: [] +--- + + + +# winowhy_no_sys + +This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the winowhy_no_sys dataset. +It achieves the following results on the evaluation set: +- Loss: 0.1856 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 4 +- eval_batch_size: 4 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 2 +- total_train_batch_size: 8 +- total_eval_batch_size: 8 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 20 +- num_epochs: 5.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:------:|:----:|:---------------:| +| 0.3041 | 0.3676 | 100 | 0.3208 | +| 0.2575 | 0.7353 | 200 | 0.2202 | +| 0.2475 | 1.1029 | 300 | 0.1856 | +| 0.1415 | 1.4706 | 400 | 0.2149 | +| 0.1281 | 1.8382 | 500 | 0.1950 | +| 0.173 | 2.2059 | 600 | 0.2180 | + + +### Framework versions + +- PEFT 0.10.0 +- Transformers 4.40.0 +- Pytorch 2.2.1 +- Datasets 2.18.0 +- Tokenizers 0.19.1 \ No newline at end of file diff --git a/llama2_13b_peft/winowhy/adapter_config.json b/llama2_13b_peft/winowhy/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cee9a588ae2800a17e2d229a92b4c336eb0d9393 --- /dev/null +++ b/llama2_13b_peft/winowhy/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "v_proj", + "k_proj", + "down_proj", + "up_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama2_13b_peft/winowhy/adapter_model.safetensors b/llama2_13b_peft/winowhy/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ae312d59c10925229913da6d80ab8bc19bba09f --- /dev/null +++ b/llama2_13b_peft/winowhy/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18f5b712225a1b176def73f49c4421f30a1665020adde7a32de725cc79f1b3d7 +size 125248064 diff --git a/llama2_13b_peft/winowhy/all_results.json b/llama2_13b_peft/winowhy/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..18a839613fce6cc63b65c491f4a0f4ef79a5e9c0 --- /dev/null +++ b/llama2_13b_peft/winowhy/all_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 2.2058823529411766, + "eval_loss": 0.18564413487911224, + "eval_runtime": 1.2133, + "eval_samples_per_second": 94.784, + "eval_steps_per_second": 12.363, + "total_flos": 3.440278732852429e+16, + "train_loss": 0.47969158987204236, + "train_runtime": 190.8837, + "train_samples_per_second": 56.972, + "train_steps_per_second": 7.125 +} \ No newline at end of file diff --git a/llama2_13b_peft/winowhy/eval_results.json b/llama2_13b_peft/winowhy/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e3608cbe152875d9891f75bf0bb0dfae2aa2bb16 --- /dev/null +++ b/llama2_13b_peft/winowhy/eval_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 2.2058823529411766, + "eval_loss": 0.18564413487911224, + "eval_runtime": 1.2133, + "eval_samples_per_second": 94.784, + "eval_steps_per_second": 12.363 +} \ No newline at end of file diff --git a/llama2_13b_peft/winowhy/special_tokens_map.json b/llama2_13b_peft/winowhy/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/llama2_13b_peft/winowhy/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama2_13b_peft/winowhy/tokenizer.model b/llama2_13b_peft/winowhy/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/llama2_13b_peft/winowhy/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/llama2_13b_peft/winowhy/tokenizer_config.json b/llama2_13b_peft/winowhy/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad --- /dev/null +++ b/llama2_13b_peft/winowhy/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/llama2_13b_peft/winowhy/train_results.json b/llama2_13b_peft/winowhy/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5b9791f01f8da5a55828b26a9c74b5574719fa2b --- /dev/null +++ b/llama2_13b_peft/winowhy/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.2058823529411766, + "total_flos": 3.440278732852429e+16, + "train_loss": 0.47969158987204236, + "train_runtime": 190.8837, + "train_samples_per_second": 56.972, + "train_steps_per_second": 7.125 +} \ No newline at end of file diff --git a/llama2_13b_peft/winowhy/trainer_log.jsonl b/llama2_13b_peft/winowhy/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d4e6df6a4b6c5596e2b5db96842db7da870d8eaf --- /dev/null +++ b/llama2_13b_peft/winowhy/trainer_log.jsonl @@ -0,0 +1,68 @@ +{"current_steps": 10, "total_steps": 1360, "loss": 9.0197, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5e-05, "epoch": 0.03676470588235294, "percentage": 0.74, "elapsed_time": "0:00:04", "remaining_time": "0:09:48"} +{"current_steps": 20, "total_steps": 1360, "loss": 6.0796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-05, "epoch": 0.07352941176470588, "percentage": 1.47, "elapsed_time": "0:00:07", "remaining_time": "0:08:03"} +{"current_steps": 30, "total_steps": 1360, "loss": 0.7866, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99931296277454e-05, "epoch": 0.11029411764705882, "percentage": 2.21, "elapsed_time": "0:00:10", "remaining_time": "0:07:25"} +{"current_steps": 40, "total_steps": 1360, "loss": 0.3572, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997252228714279e-05, "epoch": 0.14705882352941177, "percentage": 2.94, "elapsed_time": "0:00:12", "remaining_time": "0:07:05"} +{"current_steps": 50, "total_steps": 1360, "loss": 0.3669, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993818930460026e-05, "epoch": 0.18382352941176472, "percentage": 3.68, "elapsed_time": "0:00:15", "remaining_time": "0:06:52"} +{"current_steps": 60, "total_steps": 1360, "loss": 0.364, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9890149550547454e-05, "epoch": 0.22058823529411764, "percentage": 4.41, "elapsed_time": "0:00:18", "remaining_time": "0:06:41"} +{"current_steps": 70, "total_steps": 1360, "loss": 0.3211, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982842942906386e-05, "epoch": 0.25735294117647056, "percentage": 5.15, "elapsed_time": "0:00:21", "remaining_time": "0:06:34"} +{"current_steps": 80, "total_steps": 1360, "loss": 0.3052, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9753062863366276e-05, "epoch": 0.29411764705882354, "percentage": 5.88, "elapsed_time": "0:00:24", "remaining_time": "0:06:27"} +{"current_steps": 90, "total_steps": 1360, "loss": 0.3446, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966409127716367e-05, "epoch": 0.33088235294117646, "percentage": 6.62, "elapsed_time": "0:00:26", "remaining_time": "0:06:20"} +{"current_steps": 100, "total_steps": 1360, "loss": 0.3041, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.95615635718894e-05, "epoch": 0.36764705882352944, "percentage": 7.35, "elapsed_time": "0:00:29", "remaining_time": "0:06:15"} +{"current_steps": 100, "total_steps": 1360, "loss": null, "eval_loss": 0.3207797408103943, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.36764705882352944, "percentage": 7.35, "elapsed_time": "0:00:29", "remaining_time": "0:06:15"} +{"current_steps": 110, "total_steps": 1360, "loss": 0.2438, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.944553609982363e-05, "epoch": 0.40441176470588236, "percentage": 8.09, "elapsed_time": "0:00:34", "remaining_time": "0:06:28"} +{"current_steps": 120, "total_steps": 1360, "loss": 0.3688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.931607263312032e-05, "epoch": 0.4411764705882353, "percentage": 8.82, "elapsed_time": "0:00:37", "remaining_time": "0:06:22"} +{"current_steps": 130, "total_steps": 1360, "loss": 0.2945, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.917324432875627e-05, "epoch": 0.47794117647058826, "percentage": 9.56, "elapsed_time": "0:00:39", "remaining_time": "0:06:16"} +{"current_steps": 140, "total_steps": 1360, "loss": 0.2764, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9017129689421e-05, "epoch": 0.5147058823529411, "percentage": 10.29, "elapsed_time": "0:00:42", "remaining_time": "0:06:11"} +{"current_steps": 150, "total_steps": 1360, "loss": 0.3507, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8847814520369475e-05, "epoch": 0.5514705882352942, "percentage": 11.03, "elapsed_time": "0:00:45", "remaining_time": "0:06:06"} +{"current_steps": 160, "total_steps": 1360, "loss": 0.2592, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8665391882260856e-05, "epoch": 0.5882352941176471, "percentage": 11.76, "elapsed_time": "0:00:48", "remaining_time": "0:06:02"} +{"current_steps": 170, "total_steps": 1360, "loss": 0.2624, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.846996204000967e-05, "epoch": 0.625, "percentage": 12.5, "elapsed_time": "0:00:51", "remaining_time": "0:05:57"} +{"current_steps": 180, "total_steps": 1360, "loss": 0.373, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8261632407677174e-05, "epoch": 0.6617647058823529, "percentage": 13.24, "elapsed_time": "0:00:53", "remaining_time": "0:05:53"} +{"current_steps": 190, "total_steps": 1360, "loss": 0.2961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.804051748943343e-05, "epoch": 0.6985294117647058, "percentage": 13.97, "elapsed_time": "0:00:56", "remaining_time": "0:05:49"} +{"current_steps": 200, "total_steps": 1360, "loss": 0.2575, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.780673881662242e-05, "epoch": 0.7352941176470589, "percentage": 14.71, "elapsed_time": "0:00:59", "remaining_time": "0:05:45"} +{"current_steps": 200, "total_steps": 1360, "loss": null, "eval_loss": 0.22019265592098236, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.7352941176470589, "percentage": 14.71, "elapsed_time": "0:00:59", "remaining_time": "0:05:45"} +{"current_steps": 210, "total_steps": 1360, "loss": 0.2374, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.756042488096471e-05, "epoch": 0.7720588235294118, "percentage": 15.44, "elapsed_time": "0:01:03", "remaining_time": "0:05:50"} +{"current_steps": 220, "total_steps": 1360, "loss": 0.2592, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.730171106393466e-05, "epoch": 0.8088235294117647, "percentage": 16.18, "elapsed_time": "0:01:06", "remaining_time": "0:05:45"} +{"current_steps": 230, "total_steps": 1360, "loss": 0.2067, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7030739562350713e-05, "epoch": 0.8455882352941176, "percentage": 16.91, "elapsed_time": "0:01:09", "remaining_time": "0:05:41"} +{"current_steps": 240, "total_steps": 1360, "loss": 0.2181, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.674765931021976e-05, "epoch": 0.8823529411764706, "percentage": 17.65, "elapsed_time": "0:01:12", "remaining_time": "0:05:37"} +{"current_steps": 250, "total_steps": 1360, "loss": 0.2213, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.645262589687861e-05, "epoch": 0.9191176470588235, "percentage": 18.38, "elapsed_time": "0:01:15", "remaining_time": "0:05:33"} +{"current_steps": 260, "total_steps": 1360, "loss": 0.2569, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.614580148147744e-05, "epoch": 0.9558823529411765, "percentage": 19.12, "elapsed_time": "0:01:17", "remaining_time": "0:05:29"} +{"current_steps": 270, "total_steps": 1360, "loss": 0.1528, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.582735470385229e-05, "epoch": 0.9926470588235294, "percentage": 19.85, "elapsed_time": "0:01:20", "remaining_time": "0:05:26"} +{"current_steps": 280, "total_steps": 1360, "loss": 0.263, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5497460591835615e-05, "epoch": 1.0294117647058822, "percentage": 20.59, "elapsed_time": "0:01:23", "remaining_time": "0:05:22"} +{"current_steps": 290, "total_steps": 1360, "loss": 0.1773, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.515630046505575e-05, "epoch": 1.0661764705882353, "percentage": 21.32, "elapsed_time": "0:01:26", "remaining_time": "0:05:19"} +{"current_steps": 300, "total_steps": 1360, "loss": 0.2475, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.480406183527823e-05, "epoch": 1.1029411764705883, "percentage": 22.06, "elapsed_time": "0:01:29", "remaining_time": "0:05:15"} +{"current_steps": 300, "total_steps": 1360, "loss": null, "eval_loss": 0.18564413487911224, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.1029411764705883, "percentage": 22.06, "elapsed_time": "0:01:29", "remaining_time": "0:05:15"} +{"current_steps": 310, "total_steps": 1360, "loss": 0.1195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4440938303343804e-05, "epoch": 1.1397058823529411, "percentage": 22.79, "elapsed_time": "0:01:33", "remaining_time": "0:05:17"} +{"current_steps": 320, "total_steps": 1360, "loss": 0.2049, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.406712945275955e-05, "epoch": 1.1764705882352942, "percentage": 23.53, "elapsed_time": "0:01:36", "remaining_time": "0:05:13"} +{"current_steps": 330, "total_steps": 1360, "loss": 0.159, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.368284074000193e-05, "epoch": 1.213235294117647, "percentage": 24.26, "elapsed_time": "0:01:39", "remaining_time": "0:05:10"} +{"current_steps": 340, "total_steps": 1360, "loss": 0.2263, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.328828338159173e-05, "epoch": 1.25, "percentage": 25.0, "elapsed_time": "0:01:42", "remaining_time": "0:05:06"} +{"current_steps": 350, "total_steps": 1360, "loss": 0.2505, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.288367423800319e-05, "epoch": 1.2867647058823528, "percentage": 25.74, "elapsed_time": "0:01:44", "remaining_time": "0:05:02"} +{"current_steps": 360, "total_steps": 1360, "loss": 0.1879, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2469235694471043e-05, "epoch": 1.3235294117647058, "percentage": 26.47, "elapsed_time": "0:01:47", "remaining_time": "0:04:59"} +{"current_steps": 370, "total_steps": 1360, "loss": 0.1437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.204519553876095e-05, "epoch": 1.3602941176470589, "percentage": 27.21, "elapsed_time": "0:01:50", "remaining_time": "0:04:55"} +{"current_steps": 380, "total_steps": 1360, "loss": 0.18, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.161178683597054e-05, "epoch": 1.3970588235294117, "percentage": 27.94, "elapsed_time": "0:01:53", "remaining_time": "0:04:52"} +{"current_steps": 390, "total_steps": 1360, "loss": 0.2727, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.116924780042997e-05, "epoch": 1.4338235294117647, "percentage": 28.68, "elapsed_time": "0:01:56", "remaining_time": "0:04:48"} +{"current_steps": 400, "total_steps": 1360, "loss": 0.1415, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.071782166477213e-05, "epoch": 1.4705882352941178, "percentage": 29.41, "elapsed_time": "0:01:58", "remaining_time": "0:04:45"} +{"current_steps": 400, "total_steps": 1360, "loss": null, "eval_loss": 0.2149094045162201, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.4705882352941178, "percentage": 29.41, "elapsed_time": "0:01:58", "remaining_time": "0:04:45"} +{"current_steps": 410, "total_steps": 1360, "loss": 0.1996, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0257756546244804e-05, "epoch": 1.5073529411764706, "percentage": 30.15, "elapsed_time": "0:02:03", "remaining_time": "0:04:45"} +{"current_steps": 420, "total_steps": 1360, "loss": 0.2265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.978930531033807e-05, "epoch": 1.5441176470588234, "percentage": 30.88, "elapsed_time": "0:02:06", "remaining_time": "0:04:42"} +{"current_steps": 430, "total_steps": 1360, "loss": 0.1411, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.93127254318018e-05, "epoch": 1.5808823529411766, "percentage": 31.62, "elapsed_time": "0:02:08", "remaining_time": "0:04:38"} +{"current_steps": 440, "total_steps": 1360, "loss": 0.2201, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.882827885312999e-05, "epoch": 1.6176470588235294, "percentage": 32.35, "elapsed_time": "0:02:11", "remaining_time": "0:04:35"} +{"current_steps": 450, "total_steps": 1360, "loss": 0.1829, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.833623184058926e-05, "epoch": 1.6544117647058822, "percentage": 33.09, "elapsed_time": "0:02:14", "remaining_time": "0:04:32"} +{"current_steps": 460, "total_steps": 1360, "loss": 0.1413, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.783685483787105e-05, "epoch": 1.6911764705882353, "percentage": 33.82, "elapsed_time": "0:02:17", "remaining_time": "0:04:28"} +{"current_steps": 470, "total_steps": 1360, "loss": 0.1551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7330422317447685e-05, "epoch": 1.7279411764705883, "percentage": 34.56, "elapsed_time": "0:02:20", "remaining_time": "0:04:25"} +{"current_steps": 480, "total_steps": 1360, "loss": 0.2619, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.681721262971413e-05, "epoch": 1.7647058823529411, "percentage": 35.29, "elapsed_time": "0:02:23", "remaining_time": "0:04:22"} +{"current_steps": 490, "total_steps": 1360, "loss": 0.237, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.6297507849998344e-05, "epoch": 1.8014705882352942, "percentage": 36.03, "elapsed_time": "0:02:25", "remaining_time": "0:04:19"} +{"current_steps": 500, "total_steps": 1360, "loss": 0.1281, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5771593623524265e-05, "epoch": 1.8382352941176472, "percentage": 36.76, "elapsed_time": "0:02:28", "remaining_time": "0:04:15"} +{"current_steps": 500, "total_steps": 1360, "loss": null, "eval_loss": 0.19497708976268768, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.8382352941176472, "percentage": 36.76, "elapsed_time": "0:02:28", "remaining_time": "0:04:15"} +{"current_steps": 510, "total_steps": 1360, "loss": 0.1565, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5239759008412666e-05, "epoch": 1.875, "percentage": 37.5, "elapsed_time": "0:02:33", "remaining_time": "0:04:15"} +{"current_steps": 520, "total_steps": 1360, "loss": 0.1924, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.4702296316806244e-05, "epoch": 1.9117647058823528, "percentage": 38.24, "elapsed_time": "0:02:35", "remaining_time": "0:04:11"} +{"current_steps": 530, "total_steps": 1360, "loss": 0.1904, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.415950095420616e-05, "epoch": 1.9485294117647058, "percentage": 38.97, "elapsed_time": "0:02:38", "remaining_time": "0:04:08"} +{"current_steps": 540, "total_steps": 1360, "loss": 0.1523, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.361167125710832e-05, "epoch": 1.9852941176470589, "percentage": 39.71, "elapsed_time": "0:02:41", "remaining_time": "0:04:05"} +{"current_steps": 550, "total_steps": 1360, "loss": 0.1413, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.305910832902884e-05, "epoch": 2.0220588235294117, "percentage": 40.44, "elapsed_time": "0:02:44", "remaining_time": "0:04:01"} +{"current_steps": 560, "total_steps": 1360, "loss": 0.1112, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.2502115875008524e-05, "epoch": 2.0588235294117645, "percentage": 41.18, "elapsed_time": "0:02:47", "remaining_time": "0:03:58"} +{"current_steps": 570, "total_steps": 1360, "loss": 0.0935, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.1941000034687515e-05, "epoch": 2.0955882352941178, "percentage": 41.91, "elapsed_time": "0:02:50", "remaining_time": "0:03:55"} +{"current_steps": 580, "total_steps": 1360, "loss": 0.1512, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.1376069214041913e-05, "epoch": 2.1323529411764706, "percentage": 42.65, "elapsed_time": "0:02:52", "remaining_time": "0:03:52"} +{"current_steps": 590, "total_steps": 1360, "loss": 0.1686, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.0807633915874584e-05, "epoch": 2.1691176470588234, "percentage": 43.38, "elapsed_time": "0:02:55", "remaining_time": "0:03:49"} +{"current_steps": 600, "total_steps": 1360, "loss": 0.173, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.0236006569153617e-05, "epoch": 2.2058823529411766, "percentage": 44.12, "elapsed_time": "0:02:58", "remaining_time": "0:03:46"} +{"current_steps": 600, "total_steps": 1360, "loss": null, "eval_loss": 0.2179846614599228, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.2058823529411766, "percentage": 44.12, "elapsed_time": "0:02:58", "remaining_time": "0:03:46"} +{"current_steps": 600, "total_steps": 1360, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.2058823529411766, "percentage": 44.12, "elapsed_time": "0:02:58", "remaining_time": "0:03:46"} +{"current_steps": 15, "total_steps": 15, "loss": null, "eval_loss": 0.18564413487911224, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.2058823529411766, "percentage": 100.0, "elapsed_time": "0:03:09", "remaining_time": "0:00:00"} diff --git a/llama2_13b_peft/winowhy/trainer_state.json b/llama2_13b_peft/winowhy/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..391edc4290881858610b268e64840ebd258bee6d --- /dev/null +++ b/llama2_13b_peft/winowhy/trainer_state.json @@ -0,0 +1,498 @@ +{ + "best_metric": 0.18564413487911224, + "best_model_checkpoint": "ckpt/llama2_13b_fuze27_no_sys/winowhy_no_sys/checkpoint-300", + "epoch": 2.2058823529411766, + "eval_steps": 100, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03676470588235294, + "grad_norm": 19.647098541259766, + "learning_rate": 2.5e-05, + "loss": 9.0197, + "step": 10 + }, + { + "epoch": 0.07352941176470588, + "grad_norm": 8.87275218963623, + "learning_rate": 5e-05, + "loss": 6.0796, + "step": 20 + }, + { + "epoch": 0.11029411764705882, + "grad_norm": 3.1659748554229736, + "learning_rate": 4.99931296277454e-05, + "loss": 0.7866, + "step": 30 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 2.318621873855591, + "learning_rate": 4.997252228714279e-05, + "loss": 0.3572, + "step": 40 + }, + { + "epoch": 0.18382352941176472, + "grad_norm": 4.025834560394287, + "learning_rate": 4.993818930460026e-05, + "loss": 0.3669, + "step": 50 + }, + { + "epoch": 0.22058823529411764, + "grad_norm": 6.20689058303833, + "learning_rate": 4.9890149550547454e-05, + "loss": 0.364, + "step": 60 + }, + { + "epoch": 0.25735294117647056, + "grad_norm": 1.2856365442276, + "learning_rate": 4.982842942906386e-05, + "loss": 0.3211, + "step": 70 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 2.2388663291931152, + "learning_rate": 4.9753062863366276e-05, + "loss": 0.3052, + "step": 80 + }, + { + "epoch": 0.33088235294117646, + "grad_norm": 4.5252156257629395, + "learning_rate": 4.966409127716367e-05, + "loss": 0.3446, + "step": 90 + }, + { + "epoch": 0.36764705882352944, + "grad_norm": 1.3918992280960083, + "learning_rate": 4.95615635718894e-05, + "loss": 0.3041, + "step": 100 + }, + { + "epoch": 0.36764705882352944, + "eval_loss": 0.3207797408103943, + "eval_runtime": 1.2144, + "eval_samples_per_second": 94.698, + "eval_steps_per_second": 12.352, + "step": 100 + }, + { + "epoch": 0.40441176470588236, + "grad_norm": 1.2850301265716553, + "learning_rate": 4.944553609982363e-05, + "loss": 0.2438, + "step": 110 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 2.6484286785125732, + "learning_rate": 4.931607263312032e-05, + "loss": 0.3688, + "step": 120 + }, + { + "epoch": 0.47794117647058826, + "grad_norm": 1.9035048484802246, + "learning_rate": 4.917324432875627e-05, + "loss": 0.2945, + "step": 130 + }, + { + "epoch": 0.5147058823529411, + "grad_norm": 1.4788445234298706, + "learning_rate": 4.9017129689421e-05, + "loss": 0.2764, + "step": 140 + }, + { + "epoch": 0.5514705882352942, + "grad_norm": 3.2268259525299072, + "learning_rate": 4.8847814520369475e-05, + "loss": 0.3507, + "step": 150 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 4.3639116287231445, + "learning_rate": 4.8665391882260856e-05, + "loss": 0.2592, + "step": 160 + }, + { + "epoch": 0.625, + "grad_norm": 1.9433594942092896, + "learning_rate": 4.846996204000967e-05, + "loss": 0.2624, + "step": 170 + }, + { + "epoch": 0.6617647058823529, + "grad_norm": 4.276350498199463, + "learning_rate": 4.8261632407677174e-05, + "loss": 0.373, + "step": 180 + }, + { + "epoch": 0.6985294117647058, + "grad_norm": 5.467511177062988, + "learning_rate": 4.804051748943343e-05, + "loss": 0.2961, + "step": 190 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.3400689363479614, + "learning_rate": 4.780673881662242e-05, + "loss": 0.2575, + "step": 200 + }, + { + "epoch": 0.7352941176470589, + "eval_loss": 0.22019265592098236, + "eval_runtime": 1.2151, + "eval_samples_per_second": 94.641, + "eval_steps_per_second": 12.345, + "step": 200 + }, + { + "epoch": 0.7720588235294118, + "grad_norm": 1.3730210065841675, + "learning_rate": 4.756042488096471e-05, + "loss": 0.2374, + "step": 210 + }, + { + "epoch": 0.8088235294117647, + "grad_norm": 1.551169753074646, + "learning_rate": 4.730171106393466e-05, + "loss": 0.2592, + "step": 220 + }, + { + "epoch": 0.8455882352941176, + "grad_norm": 1.8357897996902466, + "learning_rate": 4.7030739562350713e-05, + "loss": 0.2067, + "step": 230 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.9791502952575684, + "learning_rate": 4.674765931021976e-05, + "loss": 0.2181, + "step": 240 + }, + { + "epoch": 0.9191176470588235, + "grad_norm": 3.2839415073394775, + "learning_rate": 4.645262589687861e-05, + "loss": 0.2213, + "step": 250 + }, + { + "epoch": 0.9558823529411765, + "grad_norm": 6.267187118530273, + "learning_rate": 4.614580148147744e-05, + "loss": 0.2569, + "step": 260 + }, + { + "epoch": 0.9926470588235294, + "grad_norm": 0.45617279410362244, + "learning_rate": 4.582735470385229e-05, + "loss": 0.1528, + "step": 270 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 2.27298641204834, + "learning_rate": 4.5497460591835615e-05, + "loss": 0.263, + "step": 280 + }, + { + "epoch": 1.0661764705882353, + "grad_norm": 0.889145016670227, + "learning_rate": 4.515630046505575e-05, + "loss": 0.1773, + "step": 290 + }, + { + "epoch": 1.1029411764705883, + "grad_norm": 1.5614073276519775, + "learning_rate": 4.480406183527823e-05, + "loss": 0.2475, + "step": 300 + }, + { + "epoch": 1.1029411764705883, + "eval_loss": 0.18564413487911224, + "eval_runtime": 1.2151, + "eval_samples_per_second": 94.643, + "eval_steps_per_second": 12.345, + "step": 300 + }, + { + "epoch": 1.1397058823529411, + "grad_norm": 1.2656581401824951, + "learning_rate": 4.4440938303343804e-05, + "loss": 0.1195, + "step": 310 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 12.3343505859375, + "learning_rate": 4.406712945275955e-05, + "loss": 0.2049, + "step": 320 + }, + { + "epoch": 1.213235294117647, + "grad_norm": 3.2121803760528564, + "learning_rate": 4.368284074000193e-05, + "loss": 0.159, + "step": 330 + }, + { + "epoch": 1.25, + "grad_norm": 2.942842960357666, + "learning_rate": 4.328828338159173e-05, + "loss": 0.2263, + "step": 340 + }, + { + "epoch": 1.2867647058823528, + "grad_norm": 2.7704460620880127, + "learning_rate": 4.288367423800319e-05, + "loss": 0.2505, + "step": 350 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 2.340487241744995, + "learning_rate": 4.2469235694471043e-05, + "loss": 0.1879, + "step": 360 + }, + { + "epoch": 1.3602941176470589, + "grad_norm": 1.4567615985870361, + "learning_rate": 4.204519553876095e-05, + "loss": 0.1437, + "step": 370 + }, + { + "epoch": 1.3970588235294117, + "grad_norm": 1.2511286735534668, + "learning_rate": 4.161178683597054e-05, + "loss": 0.18, + "step": 380 + }, + { + "epoch": 1.4338235294117647, + "grad_norm": 1.8652863502502441, + "learning_rate": 4.116924780042997e-05, + "loss": 0.2727, + "step": 390 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 1.7881311178207397, + "learning_rate": 4.071782166477213e-05, + "loss": 0.1415, + "step": 400 + }, + { + "epoch": 1.4705882352941178, + "eval_loss": 0.2149094045162201, + "eval_runtime": 1.2151, + "eval_samples_per_second": 94.639, + "eval_steps_per_second": 12.344, + "step": 400 + }, + { + "epoch": 1.5073529411764706, + "grad_norm": 0.5304898023605347, + "learning_rate": 4.0257756546244804e-05, + "loss": 0.1996, + "step": 410 + }, + { + "epoch": 1.5441176470588234, + "grad_norm": 3.2032785415649414, + "learning_rate": 3.978930531033807e-05, + "loss": 0.2265, + "step": 420 + }, + { + "epoch": 1.5808823529411766, + "grad_norm": 0.8113433718681335, + "learning_rate": 3.93127254318018e-05, + "loss": 0.1411, + "step": 430 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 0.9410021901130676, + "learning_rate": 3.882827885312999e-05, + "loss": 0.2201, + "step": 440 + }, + { + "epoch": 1.6544117647058822, + "grad_norm": 3.979304790496826, + "learning_rate": 3.833623184058926e-05, + "loss": 0.1829, + "step": 450 + }, + { + "epoch": 1.6911764705882353, + "grad_norm": 4.951131343841553, + "learning_rate": 3.783685483787105e-05, + "loss": 0.1413, + "step": 460 + }, + { + "epoch": 1.7279411764705883, + "grad_norm": 4.674718379974365, + "learning_rate": 3.7330422317447685e-05, + "loss": 0.1551, + "step": 470 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 2.091346025466919, + "learning_rate": 3.681721262971413e-05, + "loss": 0.2619, + "step": 480 + }, + { + "epoch": 1.8014705882352942, + "grad_norm": 2.1362998485565186, + "learning_rate": 3.6297507849998344e-05, + "loss": 0.237, + "step": 490 + }, + { + "epoch": 1.8382352941176472, + "grad_norm": 0.47038739919662476, + "learning_rate": 3.5771593623524265e-05, + "loss": 0.1281, + "step": 500 + }, + { + "epoch": 1.8382352941176472, + "eval_loss": 0.19497708976268768, + "eval_runtime": 1.2149, + "eval_samples_per_second": 94.658, + "eval_steps_per_second": 12.347, + "step": 500 + }, + { + "epoch": 1.875, + "grad_norm": 1.3052918910980225, + "learning_rate": 3.5239759008412666e-05, + "loss": 0.1565, + "step": 510 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 2.9952781200408936, + "learning_rate": 3.4702296316806244e-05, + "loss": 0.1924, + "step": 520 + }, + { + "epoch": 1.9485294117647058, + "grad_norm": 0.8276472091674805, + "learning_rate": 3.415950095420616e-05, + "loss": 0.1904, + "step": 530 + }, + { + "epoch": 1.9852941176470589, + "grad_norm": 2.209911823272705, + "learning_rate": 3.361167125710832e-05, + "loss": 0.1523, + "step": 540 + }, + { + "epoch": 2.0220588235294117, + "grad_norm": 1.9291682243347168, + "learning_rate": 3.305910832902884e-05, + "loss": 0.1413, + "step": 550 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.1702612340450287, + "learning_rate": 3.2502115875008524e-05, + "loss": 0.1112, + "step": 560 + }, + { + "epoch": 2.0955882352941178, + "grad_norm": 1.7829452753067017, + "learning_rate": 3.1941000034687515e-05, + "loss": 0.0935, + "step": 570 + }, + { + "epoch": 2.1323529411764706, + "grad_norm": 1.0806934833526611, + "learning_rate": 3.1376069214041913e-05, + "loss": 0.1512, + "step": 580 + }, + { + "epoch": 2.1691176470588234, + "grad_norm": 0.183503657579422, + "learning_rate": 3.0807633915874584e-05, + "loss": 0.1686, + "step": 590 + }, + { + "epoch": 2.2058823529411766, + "grad_norm": 3.425459146499634, + "learning_rate": 3.0236006569153617e-05, + "loss": 0.173, + "step": 600 + }, + { + "epoch": 2.2058823529411766, + "eval_loss": 0.2179846614599228, + "eval_runtime": 1.2136, + "eval_samples_per_second": 94.756, + "eval_steps_per_second": 12.359, + "step": 600 + }, + { + "epoch": 2.2058823529411766, + "step": 600, + "total_flos": 3.440278732852429e+16, + "train_loss": 0.47969158987204236, + "train_runtime": 190.8837, + "train_samples_per_second": 56.972, + "train_steps_per_second": 7.125 + } + ], + "logging_steps": 10, + "max_steps": 1360, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 100, + "total_flos": 3.440278732852429e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama2_13b_peft/winowhy/training_args.bin b/llama2_13b_peft/winowhy/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0055507d12524612b0f7a35112c2feb696cebfb --- /dev/null +++ b/llama2_13b_peft/winowhy/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e78f9ac454afc1cb35d5cbbebb5101ebec0583fcd21eea8b49467fe7e3fcc7 +size 5176 diff --git a/llama2_13b_peft/winowhy/training_eval_loss.png b/llama2_13b_peft/winowhy/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..c1d2515250ae98a659503bd89fb823d7e6c1a8a9 Binary files /dev/null and b/llama2_13b_peft/winowhy/training_eval_loss.png differ diff --git a/llama2_13b_peft/winowhy/training_loss.png b/llama2_13b_peft/winowhy/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..155177bbb5ad253adb93c159a1788b30278afc70 Binary files /dev/null and b/llama2_13b_peft/winowhy/training_loss.png differ