diff --git a/llama2_13b_peft/linguistics_puzzles/README.md b/llama2_13b_peft/linguistics_puzzles/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e4229a8f14f3eede0b3158a1d42fafaa20d27975
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/README.md
@@ -0,0 +1,74 @@
+---
+license: other
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: /data1/model/llama2/meta-llama/Llama2-13b
+model-index:
+- name: linguistics_puzzles_no_sys
+ results: []
+---
+
+
+
+# linguistics_puzzles_no_sys
+
+This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the linguistics_puzzles_no_sys dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.5924
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- total_train_batch_size: 8
+- total_eval_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 20
+- num_epochs: 5.0
+
+### Training results
+
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 1.1276 | 0.5263 | 100 | 1.0876 |
+| 0.8128 | 1.0526 | 200 | 0.8153 |
+| 0.6705 | 1.5789 | 300 | 0.6892 |
+| 0.4876 | 2.1053 | 400 | 0.6225 |
+| 0.4435 | 2.6316 | 500 | 0.5924 |
+| 0.2743 | 3.1579 | 600 | 0.6151 |
+| 0.2846 | 3.6842 | 700 | 0.6084 |
+| 0.2069 | 4.2105 | 800 | 0.6427 |
+| 0.172 | 4.7368 | 900 | 0.6495 |
+
+
+### Framework versions
+
+- PEFT 0.10.0
+- Transformers 4.40.0
+- Pytorch 2.2.1
+- Datasets 2.18.0
+- Tokenizers 0.19.1
\ No newline at end of file
diff --git a/llama2_13b_peft/linguistics_puzzles/adapter_config.json b/llama2_13b_peft/linguistics_puzzles/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..801d71a4f86adde83f5360df8b9fd6798a9bbe77
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0.0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "k_proj",
+ "q_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj",
+ "down_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/linguistics_puzzles/adapter_model.safetensors b/llama2_13b_peft/linguistics_puzzles/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0805ca0f3c698ab99568958c6666457c03173fe1
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef6dff2a000b0e4ef6a4db9d169cbaa257c1423e45fae8ef4f428ba9852e00f5
+size 125248064
diff --git a/llama2_13b_peft/linguistics_puzzles/all_results.json b/llama2_13b_peft/linguistics_puzzles/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3622b5658b3efa01bbac08c815371d32e7529ee3
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/all_results.json
@@ -0,0 +1,12 @@
+{
+ "epoch": 5.0,
+ "eval_loss": 0.5924356579780579,
+ "eval_runtime": 1.9025,
+ "eval_samples_per_second": 42.05,
+ "eval_steps_per_second": 5.256,
+ "total_flos": 2.0275085174217114e+17,
+ "train_loss": 0.5822552880487945,
+ "train_runtime": 660.0352,
+ "train_samples_per_second": 11.515,
+ "train_steps_per_second": 1.439
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/linguistics_puzzles/eval_results.json b/llama2_13b_peft/linguistics_puzzles/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b1113b01be64175547033f42dea6bd88c528961
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/eval_results.json
@@ -0,0 +1,7 @@
+{
+ "epoch": 5.0,
+ "eval_loss": 0.5924356579780579,
+ "eval_runtime": 1.9025,
+ "eval_samples_per_second": 42.05,
+ "eval_steps_per_second": 5.256
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/linguistics_puzzles/special_tokens_map.json b/llama2_13b_peft/linguistics_puzzles/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/llama2_13b_peft/linguistics_puzzles/tokenizer.model b/llama2_13b_peft/linguistics_puzzles/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/llama2_13b_peft/linguistics_puzzles/tokenizer_config.json b/llama2_13b_peft/linguistics_puzzles/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/tokenizer_config.json
@@ -0,0 +1,45 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}
diff --git a/llama2_13b_peft/linguistics_puzzles/train_results.json b/llama2_13b_peft/linguistics_puzzles/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a7f209917b8fb4dcd40550c1d8743c75464bafb
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/train_results.json
@@ -0,0 +1,8 @@
+{
+ "epoch": 5.0,
+ "total_flos": 2.0275085174217114e+17,
+ "train_loss": 0.5822552880487945,
+ "train_runtime": 660.0352,
+ "train_samples_per_second": 11.515,
+ "train_steps_per_second": 1.439
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/linguistics_puzzles/trainer_log.jsonl b/llama2_13b_peft/linguistics_puzzles/trainer_log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..acf3bc53f4746cba805944b915da98baa17f8f2a
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/trainer_log.jsonl
@@ -0,0 +1,106 @@
+{"current_steps": 10, "total_steps": 950, "loss": 2.5922, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5e-05, "epoch": 0.05263157894736842, "percentage": 1.05, "elapsed_time": "0:00:07", "remaining_time": "0:12:31"}
+{"current_steps": 20, "total_steps": 950, "loss": 2.3206, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-05, "epoch": 0.10526315789473684, "percentage": 2.11, "elapsed_time": "0:00:14", "remaining_time": "0:11:00"}
+{"current_steps": 30, "total_steps": 950, "loss": 1.7229, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998573727324295e-05, "epoch": 0.15789473684210525, "percentage": 3.16, "elapsed_time": "0:00:20", "remaining_time": "0:10:32"}
+{"current_steps": 40, "total_steps": 950, "loss": 1.3729, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994296536700177e-05, "epoch": 0.21052631578947367, "percentage": 4.21, "elapsed_time": "0:00:26", "remaining_time": "0:10:14"}
+{"current_steps": 50, "total_steps": 950, "loss": 1.3635, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987173308479738e-05, "epoch": 0.2631578947368421, "percentage": 5.26, "elapsed_time": "0:00:33", "remaining_time": "0:10:10"}
+{"current_steps": 60, "total_steps": 950, "loss": 1.3315, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.977212170395598e-05, "epoch": 0.3157894736842105, "percentage": 6.32, "elapsed_time": "0:00:40", "remaining_time": "0:10:00"}
+{"current_steps": 70, "total_steps": 950, "loss": 1.2515, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.964424488287009e-05, "epoch": 0.3684210526315789, "percentage": 7.37, "elapsed_time": "0:00:46", "remaining_time": "0:09:49"}
+{"current_steps": 80, "total_steps": 950, "loss": 1.1872, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.948824853131236e-05, "epoch": 0.42105263157894735, "percentage": 8.42, "elapsed_time": "0:00:53", "remaining_time": "0:09:43"}
+{"current_steps": 90, "total_steps": 950, "loss": 1.1552, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.930431064394977e-05, "epoch": 0.47368421052631576, "percentage": 9.47, "elapsed_time": "0:01:00", "remaining_time": "0:09:34"}
+{"current_steps": 100, "total_steps": 950, "loss": 1.1276, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.909264109724853e-05, "epoch": 0.5263157894736842, "percentage": 10.53, "elapsed_time": "0:01:06", "remaining_time": "0:09:27"}
+{"current_steps": 100, "total_steps": 950, "loss": null, "eval_loss": 1.0876480340957642, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.5263157894736842, "percentage": 10.53, "elapsed_time": "0:01:06", "remaining_time": "0:09:27"}
+{"current_steps": 110, "total_steps": 950, "loss": 1.1756, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.885348141000122e-05, "epoch": 0.5789473684210527, "percentage": 11.58, "elapsed_time": "0:01:15", "remaining_time": "0:09:37"}
+{"current_steps": 120, "total_steps": 950, "loss": 1.1106, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.858710446774951e-05, "epoch": 0.631578947368421, "percentage": 12.63, "elapsed_time": "0:01:22", "remaining_time": "0:09:29"}
+{"current_steps": 130, "total_steps": 950, "loss": 1.0175, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.829381421141671e-05, "epoch": 0.6842105263157895, "percentage": 13.68, "elapsed_time": "0:01:28", "remaining_time": "0:09:19"}
+{"current_steps": 140, "total_steps": 950, "loss": 0.9733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7973945290505766e-05, "epoch": 0.7368421052631579, "percentage": 14.74, "elapsed_time": "0:01:35", "remaining_time": "0:09:10"}
+{"current_steps": 150, "total_steps": 950, "loss": 0.9907, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7627862681258037e-05, "epoch": 0.7894736842105263, "percentage": 15.79, "elapsed_time": "0:01:41", "remaining_time": "0:09:03"}
+{"current_steps": 160, "total_steps": 950, "loss": 0.9312, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.725596127020879e-05, "epoch": 0.8421052631578947, "percentage": 16.84, "elapsed_time": "0:01:48", "remaining_time": "0:08:54"}
+{"current_steps": 170, "total_steps": 950, "loss": 0.9586, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.685866540361456e-05, "epoch": 0.8947368421052632, "percentage": 17.89, "elapsed_time": "0:01:55", "remaining_time": "0:08:47"}
+{"current_steps": 180, "total_steps": 950, "loss": 0.9595, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.643642840326627e-05, "epoch": 0.9473684210526315, "percentage": 18.95, "elapsed_time": "0:02:01", "remaining_time": "0:08:39"}
+{"current_steps": 190, "total_steps": 950, "loss": 0.8331, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.598973204924097e-05, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:02:08", "remaining_time": "0:08:32"}
+{"current_steps": 200, "total_steps": 950, "loss": 0.8128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.551908603018191e-05, "epoch": 1.0526315789473684, "percentage": 21.05, "elapsed_time": "0:02:14", "remaining_time": "0:08:25"}
+{"current_steps": 200, "total_steps": 950, "loss": null, "eval_loss": 0.8153461217880249, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.0526315789473684, "percentage": 21.05, "elapsed_time": "0:02:14", "remaining_time": "0:08:25"}
+{"current_steps": 210, "total_steps": 950, "loss": 0.8186, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.502502736173462e-05, "epoch": 1.1052631578947367, "percentage": 22.11, "elapsed_time": "0:02:23", "remaining_time": "0:08:26"}
+{"current_steps": 220, "total_steps": 950, "loss": 0.6895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.45081197738023e-05, "epoch": 1.1578947368421053, "percentage": 23.16, "elapsed_time": "0:02:30", "remaining_time": "0:08:18"}
+{"current_steps": 230, "total_steps": 950, "loss": 0.7901, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3968953067319777e-05, "epoch": 1.2105263157894737, "percentage": 24.21, "elapsed_time": "0:02:36", "remaining_time": "0:08:10"}
+{"current_steps": 240, "total_steps": 950, "loss": 0.704, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.340814244127993e-05, "epoch": 1.263157894736842, "percentage": 25.26, "elapsed_time": "0:02:43", "remaining_time": "0:08:03"}
+{"current_steps": 250, "total_steps": 950, "loss": 0.6879, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.282632779078051e-05, "epoch": 1.3157894736842106, "percentage": 26.32, "elapsed_time": "0:02:49", "remaining_time": "0:07:55"}
+{"current_steps": 260, "total_steps": 950, "loss": 0.7563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.222417297689217e-05, "epoch": 1.368421052631579, "percentage": 27.37, "elapsed_time": "0:02:56", "remaining_time": "0:07:48"}
+{"current_steps": 270, "total_steps": 950, "loss": 0.6846, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.160236506918098e-05, "epoch": 1.4210526315789473, "percentage": 28.42, "elapsed_time": "0:03:03", "remaining_time": "0:07:41"}
+{"current_steps": 280, "total_steps": 950, "loss": 0.7155, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.096161356174959e-05, "epoch": 1.4736842105263157, "percentage": 29.47, "elapsed_time": "0:03:09", "remaining_time": "0:07:34"}
+{"current_steps": 290, "total_steps": 950, "loss": 0.8037, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.030264956369157e-05, "epoch": 1.526315789473684, "percentage": 30.53, "elapsed_time": "0:03:16", "remaining_time": "0:07:26"}
+{"current_steps": 300, "total_steps": 950, "loss": 0.6705, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.962622496488269e-05, "epoch": 1.5789473684210527, "percentage": 31.58, "elapsed_time": "0:03:22", "remaining_time": "0:07:19"}
+{"current_steps": 300, "total_steps": 950, "loss": null, "eval_loss": 0.6891714930534363, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.5789473684210527, "percentage": 31.58, "elapsed_time": "0:03:22", "remaining_time": "0:07:19"}
+{"current_steps": 310, "total_steps": 950, "loss": 0.6389, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.893311157806091e-05, "epoch": 1.631578947368421, "percentage": 32.63, "elapsed_time": "0:03:31", "remaining_time": "0:07:17"}
+{"current_steps": 320, "total_steps": 950, "loss": 0.7223, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.822410025817406e-05, "epoch": 1.6842105263157894, "percentage": 33.68, "elapsed_time": "0:03:38", "remaining_time": "0:07:10"}
+{"current_steps": 330, "total_steps": 950, "loss": 0.6948, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7500000000000003e-05, "epoch": 1.736842105263158, "percentage": 34.74, "elapsed_time": "0:03:45", "remaining_time": "0:07:03"}
+{"current_steps": 340, "total_steps": 950, "loss": 0.6658, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.67616370150689e-05, "epoch": 1.7894736842105263, "percentage": 35.79, "elapsed_time": "0:03:51", "remaining_time": "0:06:55"}
+{"current_steps": 350, "total_steps": 950, "loss": 0.643, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.600985378894086e-05, "epoch": 1.8421052631578947, "percentage": 36.84, "elapsed_time": "0:03:58", "remaining_time": "0:06:48"}
+{"current_steps": 360, "total_steps": 950, "loss": 0.6537, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5245508119914687e-05, "epoch": 1.8947368421052633, "percentage": 37.89, "elapsed_time": "0:04:05", "remaining_time": "0:06:41"}
+{"current_steps": 370, "total_steps": 950, "loss": 0.641, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.44694721402644e-05, "epoch": 1.9473684210526314, "percentage": 38.95, "elapsed_time": "0:04:12", "remaining_time": "0:06:35"}
+{"current_steps": 380, "total_steps": 950, "loss": 0.6708, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.3682631321120504e-05, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:04:18", "remaining_time": "0:06:27"}
+{"current_steps": 390, "total_steps": 950, "loss": 0.5061, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.2885883462131394e-05, "epoch": 2.0526315789473686, "percentage": 41.05, "elapsed_time": "0:04:24", "remaining_time": "0:06:20"}
+{"current_steps": 400, "total_steps": 950, "loss": 0.4876, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.2080137667057595e-05, "epoch": 2.1052631578947367, "percentage": 42.11, "elapsed_time": "0:04:31", "remaining_time": "0:06:13"}
+{"current_steps": 400, "total_steps": 950, "loss": null, "eval_loss": 0.6224929690361023, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.1052631578947367, "percentage": 42.11, "elapsed_time": "0:04:31", "remaining_time": "0:06:13"}
+{"current_steps": 410, "total_steps": 950, "loss": 0.485, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.126631330646802e-05, "epoch": 2.1578947368421053, "percentage": 43.16, "elapsed_time": "0:04:40", "remaining_time": "0:06:09"}
+{"current_steps": 420, "total_steps": 950, "loss": 0.536, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.0445338968721287e-05, "epoch": 2.2105263157894735, "percentage": 44.21, "elapsed_time": "0:04:47", "remaining_time": "0:06:02"}
+{"current_steps": 430, "total_steps": 950, "loss": 0.4493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.961815140042974e-05, "epoch": 2.263157894736842, "percentage": 45.26, "elapsed_time": "0:04:53", "remaining_time": "0:05:55"}
+{"current_steps": 440, "total_steps": 950, "loss": 0.4806, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.878569443761442e-05, "epoch": 2.3157894736842106, "percentage": 46.32, "elapsed_time": "0:05:00", "remaining_time": "0:05:48"}
+{"current_steps": 450, "total_steps": 950, "loss": 0.4642, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.7948917928771158e-05, "epoch": 2.3684210526315788, "percentage": 47.37, "elapsed_time": "0:05:07", "remaining_time": "0:05:41"}
+{"current_steps": 460, "total_steps": 950, "loss": 0.4857, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.7108776651076118e-05, "epoch": 2.4210526315789473, "percentage": 48.42, "elapsed_time": "0:05:13", "remaining_time": "0:05:34"}
+{"current_steps": 470, "total_steps": 950, "loss": 0.4604, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.6266229220967818e-05, "epoch": 2.473684210526316, "percentage": 49.47, "elapsed_time": "0:05:20", "remaining_time": "0:05:27"}
+{"current_steps": 480, "total_steps": 950, "loss": 0.4294, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5422237000348276e-05, "epoch": 2.526315789473684, "percentage": 50.53, "elapsed_time": "0:05:27", "remaining_time": "0:05:20"}
+{"current_steps": 490, "total_steps": 950, "loss": 0.436, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.4577762999651726e-05, "epoch": 2.5789473684210527, "percentage": 51.58, "elapsed_time": "0:05:34", "remaining_time": "0:05:13"}
+{"current_steps": 500, "total_steps": 950, "loss": 0.4435, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.3733770779032184e-05, "epoch": 2.6315789473684212, "percentage": 52.63, "elapsed_time": "0:05:40", "remaining_time": "0:05:06"}
+{"current_steps": 500, "total_steps": 950, "loss": null, "eval_loss": 0.5924356579780579, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.6315789473684212, "percentage": 52.63, "elapsed_time": "0:05:40", "remaining_time": "0:05:06"}
+{"current_steps": 510, "total_steps": 950, "loss": 0.4128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.2891223348923884e-05, "epoch": 2.6842105263157894, "percentage": 53.68, "elapsed_time": "0:05:49", "remaining_time": "0:05:01"}
+{"current_steps": 520, "total_steps": 950, "loss": 0.4201, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.2051082071228854e-05, "epoch": 2.736842105263158, "percentage": 54.74, "elapsed_time": "0:05:55", "remaining_time": "0:04:54"}
+{"current_steps": 530, "total_steps": 950, "loss": 0.4144, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.1214305562385592e-05, "epoch": 2.7894736842105265, "percentage": 55.79, "elapsed_time": "0:06:02", "remaining_time": "0:04:47"}
+{"current_steps": 540, "total_steps": 950, "loss": 0.4325, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.0381848599570276e-05, "epoch": 2.8421052631578947, "percentage": 56.84, "elapsed_time": "0:06:09", "remaining_time": "0:04:40"}
+{"current_steps": 550, "total_steps": 950, "loss": 0.4539, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.9554661031278712e-05, "epoch": 2.8947368421052633, "percentage": 57.89, "elapsed_time": "0:06:15", "remaining_time": "0:04:33"}
+{"current_steps": 560, "total_steps": 950, "loss": 0.3898, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.8733686693531985e-05, "epoch": 2.9473684210526314, "percentage": 58.95, "elapsed_time": "0:06:22", "remaining_time": "0:04:26"}
+{"current_steps": 570, "total_steps": 950, "loss": 0.4347, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.79198623329424e-05, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "0:06:28", "remaining_time": "0:04:19"}
+{"current_steps": 580, "total_steps": 950, "loss": 0.2771, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.711411653786861e-05, "epoch": 3.0526315789473686, "percentage": 61.05, "elapsed_time": "0:06:35", "remaining_time": "0:04:12"}
+{"current_steps": 590, "total_steps": 950, "loss": 0.2786, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.6317368678879495e-05, "epoch": 3.1052631578947367, "percentage": 62.11, "elapsed_time": "0:06:41", "remaining_time": "0:04:05"}
+{"current_steps": 600, "total_steps": 950, "loss": 0.2743, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.55305278597356e-05, "epoch": 3.1578947368421053, "percentage": 63.16, "elapsed_time": "0:06:48", "remaining_time": "0:03:58"}
+{"current_steps": 600, "total_steps": 950, "loss": null, "eval_loss": 0.6151354908943176, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 3.1578947368421053, "percentage": 63.16, "elapsed_time": "0:06:48", "remaining_time": "0:03:58"}
+{"current_steps": 610, "total_steps": 950, "loss": 0.2611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.475449188008532e-05, "epoch": 3.2105263157894735, "percentage": 64.21, "elapsed_time": "0:06:57", "remaining_time": "0:03:52"}
+{"current_steps": 620, "total_steps": 950, "loss": 0.237, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.399014621105914e-05, "epoch": 3.263157894736842, "percentage": 65.26, "elapsed_time": "0:07:03", "remaining_time": "0:03:45"}
+{"current_steps": 630, "total_steps": 950, "loss": 0.2319, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.3238362984931113e-05, "epoch": 3.3157894736842106, "percentage": 66.32, "elapsed_time": "0:07:10", "remaining_time": "0:03:38"}
+{"current_steps": 640, "total_steps": 950, "loss": 0.2785, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.2500000000000006e-05, "epoch": 3.3684210526315788, "percentage": 67.37, "elapsed_time": "0:07:17", "remaining_time": "0:03:31"}
+{"current_steps": 650, "total_steps": 950, "loss": 0.3323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.1775899741825947e-05, "epoch": 3.4210526315789473, "percentage": 68.42, "elapsed_time": "0:07:23", "remaining_time": "0:03:24"}
+{"current_steps": 660, "total_steps": 950, "loss": 0.2762, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.1066888421939093e-05, "epoch": 3.473684210526316, "percentage": 69.47, "elapsed_time": "0:07:30", "remaining_time": "0:03:17"}
+{"current_steps": 670, "total_steps": 950, "loss": 0.2982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.0373775035117305e-05, "epoch": 3.526315789473684, "percentage": 70.53, "elapsed_time": "0:07:37", "remaining_time": "0:03:11"}
+{"current_steps": 680, "total_steps": 950, "loss": 0.2338, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.697350436308427e-06, "epoch": 3.5789473684210527, "percentage": 71.58, "elapsed_time": "0:07:43", "remaining_time": "0:03:04"}
+{"current_steps": 690, "total_steps": 950, "loss": 0.2962, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.038386438250415e-06, "epoch": 3.6315789473684212, "percentage": 72.63, "elapsed_time": "0:07:50", "remaining_time": "0:02:57"}
+{"current_steps": 700, "total_steps": 950, "loss": 0.2846, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.397634930819021e-06, "epoch": 3.6842105263157894, "percentage": 73.68, "elapsed_time": "0:07:57", "remaining_time": "0:02:50"}
+{"current_steps": 700, "total_steps": 950, "loss": null, "eval_loss": 0.6083844900131226, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 3.6842105263157894, "percentage": 73.68, "elapsed_time": "0:07:57", "remaining_time": "0:02:50"}
+{"current_steps": 710, "total_steps": 950, "loss": 0.2895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.775827023107835e-06, "epoch": 3.736842105263158, "percentage": 74.74, "elapsed_time": "0:08:06", "remaining_time": "0:02:44"}
+{"current_steps": 720, "total_steps": 950, "loss": 0.3261, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.173672209219495e-06, "epoch": 3.7894736842105265, "percentage": 75.79, "elapsed_time": "0:08:13", "remaining_time": "0:02:37"}
+{"current_steps": 730, "total_steps": 950, "loss": 0.2358, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.591857558720071e-06, "epoch": 3.8421052631578947, "percentage": 76.84, "elapsed_time": "0:08:20", "remaining_time": "0:02:30"}
+{"current_steps": 740, "total_steps": 950, "loss": 0.2723, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.031046932680229e-06, "epoch": 3.8947368421052633, "percentage": 77.89, "elapsed_time": "0:08:26", "remaining_time": "0:02:23"}
+{"current_steps": 750, "total_steps": 950, "loss": 0.2941, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.491880226197707e-06, "epoch": 3.9473684210526314, "percentage": 78.95, "elapsed_time": "0:08:33", "remaining_time": "0:02:16"}
+{"current_steps": 760, "total_steps": 950, "loss": 0.2721, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9749726382653905e-06, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "0:08:39", "remaining_time": "0:02:09"}
+{"current_steps": 770, "total_steps": 950, "loss": 0.1677, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.480913969818098e-06, "epoch": 4.052631578947368, "percentage": 81.05, "elapsed_time": "0:08:46", "remaining_time": "0:02:03"}
+{"current_steps": 780, "total_steps": 950, "loss": 0.2291, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.010267950759025e-06, "epoch": 4.105263157894737, "percentage": 82.11, "elapsed_time": "0:08:53", "remaining_time": "0:01:56"}
+{"current_steps": 790, "total_steps": 950, "loss": 0.1991, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5635715967337223e-06, "epoch": 4.157894736842105, "percentage": 83.16, "elapsed_time": "0:09:00", "remaining_time": "0:01:49"}
+{"current_steps": 800, "total_steps": 950, "loss": 0.2069, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.141334596385448e-06, "epoch": 4.2105263157894735, "percentage": 84.21, "elapsed_time": "0:09:06", "remaining_time": "0:01:42"}
+{"current_steps": 800, "total_steps": 950, "loss": null, "eval_loss": 0.6427180767059326, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.2105263157894735, "percentage": 84.21, "elapsed_time": "0:09:06", "remaining_time": "0:01:42"}
+{"current_steps": 810, "total_steps": 950, "loss": 0.2213, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.7440387297912123e-06, "epoch": 4.2631578947368425, "percentage": 85.26, "elapsed_time": "0:09:15", "remaining_time": "0:01:36"}
+{"current_steps": 820, "total_steps": 950, "loss": 0.2008, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.372137318741968e-06, "epoch": 4.315789473684211, "percentage": 86.32, "elapsed_time": "0:09:22", "remaining_time": "0:01:29"}
+{"current_steps": 830, "total_steps": 950, "loss": 0.2178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.026054709494235e-06, "epoch": 4.368421052631579, "percentage": 87.37, "elapsed_time": "0:09:28", "remaining_time": "0:01:22"}
+{"current_steps": 840, "total_steps": 950, "loss": 0.1878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.7061857885832893e-06, "epoch": 4.421052631578947, "percentage": 88.42, "elapsed_time": "0:09:35", "remaining_time": "0:01:15"}
+{"current_steps": 850, "total_steps": 950, "loss": 0.1733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.4128955322504966e-06, "epoch": 4.473684210526316, "percentage": 89.47, "elapsed_time": "0:09:41", "remaining_time": "0:01:08"}
+{"current_steps": 860, "total_steps": 950, "loss": 0.193, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.1465185899987797e-06, "epoch": 4.526315789473684, "percentage": 90.53, "elapsed_time": "0:09:48", "remaining_time": "0:01:01"}
+{"current_steps": 870, "total_steps": 950, "loss": 0.1802, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.073589027514789e-07, "epoch": 4.578947368421053, "percentage": 91.58, "elapsed_time": "0:09:55", "remaining_time": "0:00:54"}
+{"current_steps": 880, "total_steps": 950, "loss": 0.1736, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.956893560502359e-07, "epoch": 4.631578947368421, "percentage": 92.63, "elapsed_time": "0:10:01", "remaining_time": "0:00:47"}
+{"current_steps": 890, "total_steps": 950, "loss": 0.1761, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.117514686876379e-07, "epoch": 4.684210526315789, "percentage": 93.68, "elapsed_time": "0:10:08", "remaining_time": "0:00:41"}
+{"current_steps": 900, "total_steps": 950, "loss": 0.172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.557551171299051e-07, "epoch": 4.7368421052631575, "percentage": 94.74, "elapsed_time": "0:10:14", "remaining_time": "0:00:34"}
+{"current_steps": 900, "total_steps": 950, "loss": null, "eval_loss": 0.6494551301002502, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.7368421052631575, "percentage": 94.74, "elapsed_time": "0:10:14", "remaining_time": "0:00:34"}
+{"current_steps": 910, "total_steps": 950, "loss": 0.1734, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.27878296044029e-07, "epoch": 4.7894736842105265, "percentage": 95.79, "elapsed_time": "0:10:23", "remaining_time": "0:00:27"}
+{"current_steps": 920, "total_steps": 950, "loss": 0.1954, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.2826691520262114e-07, "epoch": 4.842105263157895, "percentage": 96.84, "elapsed_time": "0:10:30", "remaining_time": "0:00:20"}
+{"current_steps": 930, "total_steps": 950, "loss": 0.1744, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.7034632998231865e-08, "epoch": 4.894736842105263, "percentage": 97.89, "elapsed_time": "0:10:37", "remaining_time": "0:00:13"}
+{"current_steps": 940, "total_steps": 950, "loss": 0.1778, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.4262726757049982e-08, "epoch": 4.947368421052632, "percentage": 98.95, "elapsed_time": "0:10:43", "remaining_time": "0:00:06"}
+{"current_steps": 950, "total_steps": 950, "loss": 0.1836, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 0.0, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:10:50", "remaining_time": "0:00:00"}
+{"current_steps": 950, "total_steps": 950, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:10:50", "remaining_time": "0:00:00"}
+{"current_steps": 10, "total_steps": 10, "loss": null, "eval_loss": 0.5924356579780579, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:11:00", "remaining_time": "0:00:00"}
diff --git a/llama2_13b_peft/linguistics_puzzles/trainer_state.json b/llama2_13b_peft/linguistics_puzzles/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..8d2aba5a478caae31e9b9f8f309e413aebb9459f
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/trainer_state.json
@@ -0,0 +1,767 @@
+{
+ "best_metric": 0.5924356579780579,
+ "best_model_checkpoint": "ckpt/llama2_13b_other/linguistics_puzzles_no_sys/checkpoint-500",
+ "epoch": 5.0,
+ "eval_steps": 100,
+ "global_step": 950,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.05263157894736842,
+ "grad_norm": 1.5050264596939087,
+ "learning_rate": 2.5e-05,
+ "loss": 2.5922,
+ "step": 10
+ },
+ {
+ "epoch": 0.10526315789473684,
+ "grad_norm": 1.5525988340377808,
+ "learning_rate": 5e-05,
+ "loss": 2.3206,
+ "step": 20
+ },
+ {
+ "epoch": 0.15789473684210525,
+ "grad_norm": 1.7404705286026,
+ "learning_rate": 4.998573727324295e-05,
+ "loss": 1.7229,
+ "step": 30
+ },
+ {
+ "epoch": 0.21052631578947367,
+ "grad_norm": 1.8962088823318481,
+ "learning_rate": 4.994296536700177e-05,
+ "loss": 1.3729,
+ "step": 40
+ },
+ {
+ "epoch": 0.2631578947368421,
+ "grad_norm": 1.776729941368103,
+ "learning_rate": 4.987173308479738e-05,
+ "loss": 1.3635,
+ "step": 50
+ },
+ {
+ "epoch": 0.3157894736842105,
+ "grad_norm": 11.020795822143555,
+ "learning_rate": 4.977212170395598e-05,
+ "loss": 1.3315,
+ "step": 60
+ },
+ {
+ "epoch": 0.3684210526315789,
+ "grad_norm": 2.192176580429077,
+ "learning_rate": 4.964424488287009e-05,
+ "loss": 1.2515,
+ "step": 70
+ },
+ {
+ "epoch": 0.42105263157894735,
+ "grad_norm": 2.4063496589660645,
+ "learning_rate": 4.948824853131236e-05,
+ "loss": 1.1872,
+ "step": 80
+ },
+ {
+ "epoch": 0.47368421052631576,
+ "grad_norm": 2.7862613201141357,
+ "learning_rate": 4.930431064394977e-05,
+ "loss": 1.1552,
+ "step": 90
+ },
+ {
+ "epoch": 0.5263157894736842,
+ "grad_norm": 3.5330026149749756,
+ "learning_rate": 4.909264109724853e-05,
+ "loss": 1.1276,
+ "step": 100
+ },
+ {
+ "epoch": 0.5263157894736842,
+ "eval_loss": 1.0876480340957642,
+ "eval_runtime": 1.9022,
+ "eval_samples_per_second": 42.057,
+ "eval_steps_per_second": 5.257,
+ "step": 100
+ },
+ {
+ "epoch": 0.5789473684210527,
+ "grad_norm": 2.4774415493011475,
+ "learning_rate": 4.885348141000122e-05,
+ "loss": 1.1756,
+ "step": 110
+ },
+ {
+ "epoch": 0.631578947368421,
+ "grad_norm": 2.380500555038452,
+ "learning_rate": 4.858710446774951e-05,
+ "loss": 1.1106,
+ "step": 120
+ },
+ {
+ "epoch": 0.6842105263157895,
+ "grad_norm": 3.0656540393829346,
+ "learning_rate": 4.829381421141671e-05,
+ "loss": 1.0175,
+ "step": 130
+ },
+ {
+ "epoch": 0.7368421052631579,
+ "grad_norm": 5.256251811981201,
+ "learning_rate": 4.7973945290505766e-05,
+ "loss": 0.9733,
+ "step": 140
+ },
+ {
+ "epoch": 0.7894736842105263,
+ "grad_norm": 2.674135446548462,
+ "learning_rate": 4.7627862681258037e-05,
+ "loss": 0.9907,
+ "step": 150
+ },
+ {
+ "epoch": 0.8421052631578947,
+ "grad_norm": 3.5206069946289062,
+ "learning_rate": 4.725596127020879e-05,
+ "loss": 0.9312,
+ "step": 160
+ },
+ {
+ "epoch": 0.8947368421052632,
+ "grad_norm": 3.4086978435516357,
+ "learning_rate": 4.685866540361456e-05,
+ "loss": 0.9586,
+ "step": 170
+ },
+ {
+ "epoch": 0.9473684210526315,
+ "grad_norm": 4.591642379760742,
+ "learning_rate": 4.643642840326627e-05,
+ "loss": 0.9595,
+ "step": 180
+ },
+ {
+ "epoch": 1.0,
+ "grad_norm": 2.8823249340057373,
+ "learning_rate": 4.598973204924097e-05,
+ "loss": 0.8331,
+ "step": 190
+ },
+ {
+ "epoch": 1.0526315789473684,
+ "grad_norm": 3.7064428329467773,
+ "learning_rate": 4.551908603018191e-05,
+ "loss": 0.8128,
+ "step": 200
+ },
+ {
+ "epoch": 1.0526315789473684,
+ "eval_loss": 0.8153461217880249,
+ "eval_runtime": 1.9192,
+ "eval_samples_per_second": 41.684,
+ "eval_steps_per_second": 5.21,
+ "step": 200
+ },
+ {
+ "epoch": 1.1052631578947367,
+ "grad_norm": 4.2386274337768555,
+ "learning_rate": 4.502502736173462e-05,
+ "loss": 0.8186,
+ "step": 210
+ },
+ {
+ "epoch": 1.1578947368421053,
+ "grad_norm": 3.1767256259918213,
+ "learning_rate": 4.45081197738023e-05,
+ "loss": 0.6895,
+ "step": 220
+ },
+ {
+ "epoch": 1.2105263157894737,
+ "grad_norm": 3.748518466949463,
+ "learning_rate": 4.3968953067319777e-05,
+ "loss": 0.7901,
+ "step": 230
+ },
+ {
+ "epoch": 1.263157894736842,
+ "grad_norm": 3.807053565979004,
+ "learning_rate": 4.340814244127993e-05,
+ "loss": 0.704,
+ "step": 240
+ },
+ {
+ "epoch": 1.3157894736842106,
+ "grad_norm": 5.013542175292969,
+ "learning_rate": 4.282632779078051e-05,
+ "loss": 0.6879,
+ "step": 250
+ },
+ {
+ "epoch": 1.368421052631579,
+ "grad_norm": 4.752715110778809,
+ "learning_rate": 4.222417297689217e-05,
+ "loss": 0.7563,
+ "step": 260
+ },
+ {
+ "epoch": 1.4210526315789473,
+ "grad_norm": 3.6476950645446777,
+ "learning_rate": 4.160236506918098e-05,
+ "loss": 0.6846,
+ "step": 270
+ },
+ {
+ "epoch": 1.4736842105263157,
+ "grad_norm": 3.8758108615875244,
+ "learning_rate": 4.096161356174959e-05,
+ "loss": 0.7155,
+ "step": 280
+ },
+ {
+ "epoch": 1.526315789473684,
+ "grad_norm": 4.166601657867432,
+ "learning_rate": 4.030264956369157e-05,
+ "loss": 0.8037,
+ "step": 290
+ },
+ {
+ "epoch": 1.5789473684210527,
+ "grad_norm": 4.603171348571777,
+ "learning_rate": 3.962622496488269e-05,
+ "loss": 0.6705,
+ "step": 300
+ },
+ {
+ "epoch": 1.5789473684210527,
+ "eval_loss": 0.6891714930534363,
+ "eval_runtime": 1.9174,
+ "eval_samples_per_second": 41.724,
+ "eval_steps_per_second": 5.216,
+ "step": 300
+ },
+ {
+ "epoch": 1.631578947368421,
+ "grad_norm": 3.820142984390259,
+ "learning_rate": 3.893311157806091e-05,
+ "loss": 0.6389,
+ "step": 310
+ },
+ {
+ "epoch": 1.6842105263157894,
+ "grad_norm": 5.900814533233643,
+ "learning_rate": 3.822410025817406e-05,
+ "loss": 0.7223,
+ "step": 320
+ },
+ {
+ "epoch": 1.736842105263158,
+ "grad_norm": 4.315140724182129,
+ "learning_rate": 3.7500000000000003e-05,
+ "loss": 0.6948,
+ "step": 330
+ },
+ {
+ "epoch": 1.7894736842105263,
+ "grad_norm": 4.747324466705322,
+ "learning_rate": 3.67616370150689e-05,
+ "loss": 0.6658,
+ "step": 340
+ },
+ {
+ "epoch": 1.8421052631578947,
+ "grad_norm": 3.504014492034912,
+ "learning_rate": 3.600985378894086e-05,
+ "loss": 0.643,
+ "step": 350
+ },
+ {
+ "epoch": 1.8947368421052633,
+ "grad_norm": 5.181077480316162,
+ "learning_rate": 3.5245508119914687e-05,
+ "loss": 0.6537,
+ "step": 360
+ },
+ {
+ "epoch": 1.9473684210526314,
+ "grad_norm": 5.073149681091309,
+ "learning_rate": 3.44694721402644e-05,
+ "loss": 0.641,
+ "step": 370
+ },
+ {
+ "epoch": 2.0,
+ "grad_norm": 5.070895671844482,
+ "learning_rate": 3.3682631321120504e-05,
+ "loss": 0.6708,
+ "step": 380
+ },
+ {
+ "epoch": 2.0526315789473686,
+ "grad_norm": 5.305852890014648,
+ "learning_rate": 3.2885883462131394e-05,
+ "loss": 0.5061,
+ "step": 390
+ },
+ {
+ "epoch": 2.1052631578947367,
+ "grad_norm": 6.452213287353516,
+ "learning_rate": 3.2080137667057595e-05,
+ "loss": 0.4876,
+ "step": 400
+ },
+ {
+ "epoch": 2.1052631578947367,
+ "eval_loss": 0.6224929690361023,
+ "eval_runtime": 1.9167,
+ "eval_samples_per_second": 41.739,
+ "eval_steps_per_second": 5.217,
+ "step": 400
+ },
+ {
+ "epoch": 2.1578947368421053,
+ "grad_norm": 3.6080775260925293,
+ "learning_rate": 3.126631330646802e-05,
+ "loss": 0.485,
+ "step": 410
+ },
+ {
+ "epoch": 2.2105263157894735,
+ "grad_norm": 2.2630574703216553,
+ "learning_rate": 3.0445338968721287e-05,
+ "loss": 0.536,
+ "step": 420
+ },
+ {
+ "epoch": 2.263157894736842,
+ "grad_norm": 4.616273880004883,
+ "learning_rate": 2.961815140042974e-05,
+ "loss": 0.4493,
+ "step": 430
+ },
+ {
+ "epoch": 2.3157894736842106,
+ "grad_norm": 4.5297956466674805,
+ "learning_rate": 2.878569443761442e-05,
+ "loss": 0.4806,
+ "step": 440
+ },
+ {
+ "epoch": 2.3684210526315788,
+ "grad_norm": 4.910376071929932,
+ "learning_rate": 2.7948917928771158e-05,
+ "loss": 0.4642,
+ "step": 450
+ },
+ {
+ "epoch": 2.4210526315789473,
+ "grad_norm": 4.3276801109313965,
+ "learning_rate": 2.7108776651076118e-05,
+ "loss": 0.4857,
+ "step": 460
+ },
+ {
+ "epoch": 2.473684210526316,
+ "grad_norm": 3.657116413116455,
+ "learning_rate": 2.6266229220967818e-05,
+ "loss": 0.4604,
+ "step": 470
+ },
+ {
+ "epoch": 2.526315789473684,
+ "grad_norm": 4.7539896965026855,
+ "learning_rate": 2.5422237000348276e-05,
+ "loss": 0.4294,
+ "step": 480
+ },
+ {
+ "epoch": 2.5789473684210527,
+ "grad_norm": 4.227921962738037,
+ "learning_rate": 2.4577762999651726e-05,
+ "loss": 0.436,
+ "step": 490
+ },
+ {
+ "epoch": 2.6315789473684212,
+ "grad_norm": 6.821872234344482,
+ "learning_rate": 2.3733770779032184e-05,
+ "loss": 0.4435,
+ "step": 500
+ },
+ {
+ "epoch": 2.6315789473684212,
+ "eval_loss": 0.5924356579780579,
+ "eval_runtime": 1.9193,
+ "eval_samples_per_second": 41.683,
+ "eval_steps_per_second": 5.21,
+ "step": 500
+ },
+ {
+ "epoch": 2.6842105263157894,
+ "grad_norm": 4.023755073547363,
+ "learning_rate": 2.2891223348923884e-05,
+ "loss": 0.4128,
+ "step": 510
+ },
+ {
+ "epoch": 2.736842105263158,
+ "grad_norm": 4.245009899139404,
+ "learning_rate": 2.2051082071228854e-05,
+ "loss": 0.4201,
+ "step": 520
+ },
+ {
+ "epoch": 2.7894736842105265,
+ "grad_norm": 7.485212326049805,
+ "learning_rate": 2.1214305562385592e-05,
+ "loss": 0.4144,
+ "step": 530
+ },
+ {
+ "epoch": 2.8421052631578947,
+ "grad_norm": 3.890044689178467,
+ "learning_rate": 2.0381848599570276e-05,
+ "loss": 0.4325,
+ "step": 540
+ },
+ {
+ "epoch": 2.8947368421052633,
+ "grad_norm": 5.785126686096191,
+ "learning_rate": 1.9554661031278712e-05,
+ "loss": 0.4539,
+ "step": 550
+ },
+ {
+ "epoch": 2.9473684210526314,
+ "grad_norm": 3.959681272506714,
+ "learning_rate": 1.8733686693531985e-05,
+ "loss": 0.3898,
+ "step": 560
+ },
+ {
+ "epoch": 3.0,
+ "grad_norm": 6.1470160484313965,
+ "learning_rate": 1.79198623329424e-05,
+ "loss": 0.4347,
+ "step": 570
+ },
+ {
+ "epoch": 3.0526315789473686,
+ "grad_norm": 6.080893039703369,
+ "learning_rate": 1.711411653786861e-05,
+ "loss": 0.2771,
+ "step": 580
+ },
+ {
+ "epoch": 3.1052631578947367,
+ "grad_norm": 3.995936155319214,
+ "learning_rate": 1.6317368678879495e-05,
+ "loss": 0.2786,
+ "step": 590
+ },
+ {
+ "epoch": 3.1578947368421053,
+ "grad_norm": 4.9943084716796875,
+ "learning_rate": 1.55305278597356e-05,
+ "loss": 0.2743,
+ "step": 600
+ },
+ {
+ "epoch": 3.1578947368421053,
+ "eval_loss": 0.6151354908943176,
+ "eval_runtime": 1.9185,
+ "eval_samples_per_second": 41.7,
+ "eval_steps_per_second": 5.212,
+ "step": 600
+ },
+ {
+ "epoch": 3.2105263157894735,
+ "grad_norm": 3.650193452835083,
+ "learning_rate": 1.475449188008532e-05,
+ "loss": 0.2611,
+ "step": 610
+ },
+ {
+ "epoch": 3.263157894736842,
+ "grad_norm": 3.5425643920898438,
+ "learning_rate": 1.399014621105914e-05,
+ "loss": 0.237,
+ "step": 620
+ },
+ {
+ "epoch": 3.3157894736842106,
+ "grad_norm": 4.187167644500732,
+ "learning_rate": 1.3238362984931113e-05,
+ "loss": 0.2319,
+ "step": 630
+ },
+ {
+ "epoch": 3.3684210526315788,
+ "grad_norm": 3.7174108028411865,
+ "learning_rate": 1.2500000000000006e-05,
+ "loss": 0.2785,
+ "step": 640
+ },
+ {
+ "epoch": 3.4210526315789473,
+ "grad_norm": 4.665218353271484,
+ "learning_rate": 1.1775899741825947e-05,
+ "loss": 0.3323,
+ "step": 650
+ },
+ {
+ "epoch": 3.473684210526316,
+ "grad_norm": 6.711315631866455,
+ "learning_rate": 1.1066888421939093e-05,
+ "loss": 0.2762,
+ "step": 660
+ },
+ {
+ "epoch": 3.526315789473684,
+ "grad_norm": 4.101406097412109,
+ "learning_rate": 1.0373775035117305e-05,
+ "loss": 0.2982,
+ "step": 670
+ },
+ {
+ "epoch": 3.5789473684210527,
+ "grad_norm": 3.3571157455444336,
+ "learning_rate": 9.697350436308427e-06,
+ "loss": 0.2338,
+ "step": 680
+ },
+ {
+ "epoch": 3.6315789473684212,
+ "grad_norm": 7.152629852294922,
+ "learning_rate": 9.038386438250415e-06,
+ "loss": 0.2962,
+ "step": 690
+ },
+ {
+ "epoch": 3.6842105263157894,
+ "grad_norm": 5.147871971130371,
+ "learning_rate": 8.397634930819021e-06,
+ "loss": 0.2846,
+ "step": 700
+ },
+ {
+ "epoch": 3.6842105263157894,
+ "eval_loss": 0.6083844900131226,
+ "eval_runtime": 1.9199,
+ "eval_samples_per_second": 41.67,
+ "eval_steps_per_second": 5.209,
+ "step": 700
+ },
+ {
+ "epoch": 3.736842105263158,
+ "grad_norm": 3.984264373779297,
+ "learning_rate": 7.775827023107835e-06,
+ "loss": 0.2895,
+ "step": 710
+ },
+ {
+ "epoch": 3.7894736842105265,
+ "grad_norm": 6.230710983276367,
+ "learning_rate": 7.173672209219495e-06,
+ "loss": 0.3261,
+ "step": 720
+ },
+ {
+ "epoch": 3.8421052631578947,
+ "grad_norm": 3.685063362121582,
+ "learning_rate": 6.591857558720071e-06,
+ "loss": 0.2358,
+ "step": 730
+ },
+ {
+ "epoch": 3.8947368421052633,
+ "grad_norm": 4.337435245513916,
+ "learning_rate": 6.031046932680229e-06,
+ "loss": 0.2723,
+ "step": 740
+ },
+ {
+ "epoch": 3.9473684210526314,
+ "grad_norm": 4.504445552825928,
+ "learning_rate": 5.491880226197707e-06,
+ "loss": 0.2941,
+ "step": 750
+ },
+ {
+ "epoch": 4.0,
+ "grad_norm": 4.7959442138671875,
+ "learning_rate": 4.9749726382653905e-06,
+ "loss": 0.2721,
+ "step": 760
+ },
+ {
+ "epoch": 4.052631578947368,
+ "grad_norm": 2.663322925567627,
+ "learning_rate": 4.480913969818098e-06,
+ "loss": 0.1677,
+ "step": 770
+ },
+ {
+ "epoch": 4.105263157894737,
+ "grad_norm": 5.704188346862793,
+ "learning_rate": 4.010267950759025e-06,
+ "loss": 0.2291,
+ "step": 780
+ },
+ {
+ "epoch": 4.157894736842105,
+ "grad_norm": 4.857370853424072,
+ "learning_rate": 3.5635715967337223e-06,
+ "loss": 0.1991,
+ "step": 790
+ },
+ {
+ "epoch": 4.2105263157894735,
+ "grad_norm": 2.6290528774261475,
+ "learning_rate": 3.141334596385448e-06,
+ "loss": 0.2069,
+ "step": 800
+ },
+ {
+ "epoch": 4.2105263157894735,
+ "eval_loss": 0.6427180767059326,
+ "eval_runtime": 1.9195,
+ "eval_samples_per_second": 41.677,
+ "eval_steps_per_second": 5.21,
+ "step": 800
+ },
+ {
+ "epoch": 4.2631578947368425,
+ "grad_norm": 6.7939558029174805,
+ "learning_rate": 2.7440387297912123e-06,
+ "loss": 0.2213,
+ "step": 810
+ },
+ {
+ "epoch": 4.315789473684211,
+ "grad_norm": 5.425328731536865,
+ "learning_rate": 2.372137318741968e-06,
+ "loss": 0.2008,
+ "step": 820
+ },
+ {
+ "epoch": 4.368421052631579,
+ "grad_norm": 3.0159809589385986,
+ "learning_rate": 2.026054709494235e-06,
+ "loss": 0.2178,
+ "step": 830
+ },
+ {
+ "epoch": 4.421052631578947,
+ "grad_norm": 4.54276704788208,
+ "learning_rate": 1.7061857885832893e-06,
+ "loss": 0.1878,
+ "step": 840
+ },
+ {
+ "epoch": 4.473684210526316,
+ "grad_norm": 4.1157755851745605,
+ "learning_rate": 1.4128955322504966e-06,
+ "loss": 0.1733,
+ "step": 850
+ },
+ {
+ "epoch": 4.526315789473684,
+ "grad_norm": 4.860106945037842,
+ "learning_rate": 1.1465185899987797e-06,
+ "loss": 0.193,
+ "step": 860
+ },
+ {
+ "epoch": 4.578947368421053,
+ "grad_norm": 4.945047378540039,
+ "learning_rate": 9.073589027514789e-07,
+ "loss": 0.1802,
+ "step": 870
+ },
+ {
+ "epoch": 4.631578947368421,
+ "grad_norm": 2.316741943359375,
+ "learning_rate": 6.956893560502359e-07,
+ "loss": 0.1736,
+ "step": 880
+ },
+ {
+ "epoch": 4.684210526315789,
+ "grad_norm": 4.012813091278076,
+ "learning_rate": 5.117514686876379e-07,
+ "loss": 0.1761,
+ "step": 890
+ },
+ {
+ "epoch": 4.7368421052631575,
+ "grad_norm": 5.301681995391846,
+ "learning_rate": 3.557551171299051e-07,
+ "loss": 0.172,
+ "step": 900
+ },
+ {
+ "epoch": 4.7368421052631575,
+ "eval_loss": 0.6494551301002502,
+ "eval_runtime": 1.9201,
+ "eval_samples_per_second": 41.665,
+ "eval_steps_per_second": 5.208,
+ "step": 900
+ },
+ {
+ "epoch": 4.7894736842105265,
+ "grad_norm": 3.559140205383301,
+ "learning_rate": 2.27878296044029e-07,
+ "loss": 0.1734,
+ "step": 910
+ },
+ {
+ "epoch": 4.842105263157895,
+ "grad_norm": 7.743849277496338,
+ "learning_rate": 1.2826691520262114e-07,
+ "loss": 0.1954,
+ "step": 920
+ },
+ {
+ "epoch": 4.894736842105263,
+ "grad_norm": 3.5408854484558105,
+ "learning_rate": 5.7034632998231865e-08,
+ "loss": 0.1744,
+ "step": 930
+ },
+ {
+ "epoch": 4.947368421052632,
+ "grad_norm": 2.413121461868286,
+ "learning_rate": 1.4262726757049982e-08,
+ "loss": 0.1778,
+ "step": 940
+ },
+ {
+ "epoch": 5.0,
+ "grad_norm": 2.56962513923645,
+ "learning_rate": 0.0,
+ "loss": 0.1836,
+ "step": 950
+ },
+ {
+ "epoch": 5.0,
+ "step": 950,
+ "total_flos": 2.0275085174217114e+17,
+ "train_loss": 0.5822552880487945,
+ "train_runtime": 660.0352,
+ "train_samples_per_second": 11.515,
+ "train_steps_per_second": 1.439
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 950,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 100,
+ "total_flos": 2.0275085174217114e+17,
+ "train_batch_size": 4,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/llama2_13b_peft/linguistics_puzzles/training_args.bin b/llama2_13b_peft/linguistics_puzzles/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..998f813cd6613d304b43fa85e7995b297053a484
--- /dev/null
+++ b/llama2_13b_peft/linguistics_puzzles/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67ac58d8b967dcc701c74de72e5e18349db160299022d297808b6aa2f75860a0
+size 5176
diff --git a/llama2_13b_peft/linguistics_puzzles/training_eval_loss.png b/llama2_13b_peft/linguistics_puzzles/training_eval_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..f9e938a29eab3735e50f5ced837e7027bc478ee8
Binary files /dev/null and b/llama2_13b_peft/linguistics_puzzles/training_eval_loss.png differ
diff --git a/llama2_13b_peft/linguistics_puzzles/training_loss.png b/llama2_13b_peft/linguistics_puzzles/training_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..56ee27bf2bef7198c03838f4403ee262b08e15ad
Binary files /dev/null and b/llama2_13b_peft/linguistics_puzzles/training_loss.png differ
diff --git a/llama2_13b_peft/news_commentary_de/README.md b/llama2_13b_peft/news_commentary_de/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fd3b86616b694273bc9e706a5e8ed3747ef141f3
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/README.md
@@ -0,0 +1,85 @@
+---
+license: other
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: /data1/model/llama2/meta-llama/Llama2-13b
+model-index:
+- name: news_commentary_de_no_sys
+ results: []
+---
+
+
+
+# news_commentary_de_no_sys
+
+This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the news_commentary_de_no_sys dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.6944
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- total_train_batch_size: 16
+- total_eval_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 20
+- num_epochs: 10.0
+
+### Training results
+
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.7429 | 0.13 | 200 | 0.7712 |
+| 0.7549 | 0.25 | 400 | 0.7434 |
+| 0.7552 | 0.38 | 600 | 0.7330 |
+| 0.7265 | 0.5 | 800 | 0.7256 |
+| 0.7524 | 0.63 | 1000 | 0.7200 |
+| 0.6976 | 0.75 | 1200 | 0.7151 |
+| 0.7408 | 0.88 | 1400 | 0.7116 |
+| 0.701 | 1.0 | 1600 | 0.7085 |
+| 0.7084 | 1.13 | 1800 | 0.7059 |
+| 0.6999 | 1.25 | 2000 | 0.7040 |
+| 0.7182 | 1.38 | 2200 | 0.7022 |
+| 0.7267 | 1.51 | 2400 | 0.6994 |
+| 0.6912 | 1.63 | 2600 | 0.6972 |
+| 0.6821 | 1.76 | 2800 | 0.6954 |
+| 0.7104 | 1.88 | 3000 | 0.6944 |
+| 0.6222 | 2.01 | 3200 | 0.6934 |
+| 0.6383 | 2.13 | 3400 | 0.6974 |
+| 0.6436 | 2.26 | 3600 | 0.6981 |
+| 0.6444 | 2.38 | 3800 | 0.6968 |
+| 0.6368 | 2.51 | 4000 | 0.6987 |
+
+
+### Framework versions
+
+- PEFT 0.9.0
+- Transformers 4.38.2
+- Pytorch 2.2.1
+- Datasets 2.18.0
+- Tokenizers 0.15.2
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_de/adapter_config.json b/llama2_13b_peft/news_commentary_de/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b194b947adda4e9dcd02e2d860237742eda5a32
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/adapter_config.json
@@ -0,0 +1,33 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0.0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "up_proj",
+ "down_proj",
+ "v_proj",
+ "k_proj",
+ "o_proj",
+ "gate_proj",
+ "q_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_de/adapter_model.safetensors b/llama2_13b_peft/news_commentary_de/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c87e568a8d15429a2724b153c7cfc1503989dd8c
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44a2422055e9542643288a7443b823001443ae5a402e2cff85e691f7121a6398
+size 125248064
diff --git a/llama2_13b_peft/news_commentary_de/all_results.json b/llama2_13b_peft/news_commentary_de/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..7f983a27ebbf7d87dd34440f5b3ab768de93a3a2
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/all_results.json
@@ -0,0 +1,11 @@
+{
+ "epoch": 2.51,
+ "eval_loss": 0.6943792104721069,
+ "eval_runtime": 64.8294,
+ "eval_samples_per_second": 69.413,
+ "eval_steps_per_second": 4.35,
+ "train_loss": 0.7081527805328369,
+ "train_runtime": 4312.5386,
+ "train_samples_per_second": 59.13,
+ "train_steps_per_second": 3.696
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_de/eval_results.json b/llama2_13b_peft/news_commentary_de/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..be510258d76594c25b96d4a80a69af1b0819d82d
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/eval_results.json
@@ -0,0 +1,7 @@
+{
+ "epoch": 2.51,
+ "eval_loss": 0.6943792104721069,
+ "eval_runtime": 64.8294,
+ "eval_samples_per_second": 69.413,
+ "eval_steps_per_second": 4.35
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_de/special_tokens_map.json b/llama2_13b_peft/news_commentary_de/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/llama2_13b_peft/news_commentary_de/tokenizer.model b/llama2_13b_peft/news_commentary_de/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/llama2_13b_peft/news_commentary_de/tokenizer_config.json b/llama2_13b_peft/news_commentary_de/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/tokenizer_config.json
@@ -0,0 +1,45 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}
diff --git a/llama2_13b_peft/news_commentary_de/train_results.json b/llama2_13b_peft/news_commentary_de/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1574d04dd4c2f1ff6d4ddf979ddb0f4aef9e188c
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/train_results.json
@@ -0,0 +1,7 @@
+{
+ "epoch": 2.51,
+ "train_loss": 0.7081527805328369,
+ "train_runtime": 4312.5386,
+ "train_samples_per_second": 59.13,
+ "train_steps_per_second": 3.696
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_de/trainer_log.jsonl b/llama2_13b_peft/news_commentary_de/trainer_log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..be1137ec35039ee7721d558e8f2fa1a93b15a88b
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/trainer_log.jsonl
@@ -0,0 +1,422 @@
+{"current_steps": 10, "total_steps": 15940, "loss": 1.3994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5e-06, "epoch": 0.01, "percentage": 0.06, "elapsed_time": "0:00:10", "remaining_time": "4:29:17"}
+{"current_steps": 20, "total_steps": 15940, "loss": 1.4561, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1e-05, "epoch": 0.01, "percentage": 0.13, "elapsed_time": "0:00:16", "remaining_time": "3:40:16"}
+{"current_steps": 30, "total_steps": 15940, "loss": 1.3697, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999990264607035e-06, "epoch": 0.02, "percentage": 0.19, "elapsed_time": "0:00:23", "remaining_time": "3:30:45"}
+{"current_steps": 40, "total_steps": 15940, "loss": 1.3627, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999961058466052e-06, "epoch": 0.03, "percentage": 0.25, "elapsed_time": "0:00:30", "remaining_time": "3:21:30"}
+{"current_steps": 50, "total_steps": 15940, "loss": 1.1155, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999912381690781e-06, "epoch": 0.03, "percentage": 0.31, "elapsed_time": "0:00:40", "remaining_time": "3:33:21"}
+{"current_steps": 60, "total_steps": 15940, "loss": 0.9492, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999844234470782e-06, "epoch": 0.04, "percentage": 0.38, "elapsed_time": "0:00:46", "remaining_time": "3:26:39"}
+{"current_steps": 70, "total_steps": 15940, "loss": 0.9067, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999756617071427e-06, "epoch": 0.04, "percentage": 0.44, "elapsed_time": "0:00:53", "remaining_time": "3:22:54"}
+{"current_steps": 80, "total_steps": 15940, "loss": 0.8848, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999649529833915e-06, "epoch": 0.05, "percentage": 0.5, "elapsed_time": "0:01:00", "remaining_time": "3:20:21"}
+{"current_steps": 90, "total_steps": 15940, "loss": 0.798, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999522973175257e-06, "epoch": 0.06, "percentage": 0.56, "elapsed_time": "0:01:10", "remaining_time": "3:27:40"}
+{"current_steps": 100, "total_steps": 15940, "loss": 0.8782, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999376947588288e-06, "epoch": 0.06, "percentage": 0.63, "elapsed_time": "0:01:17", "remaining_time": "3:24:28"}
+{"current_steps": 110, "total_steps": 15940, "loss": 0.8124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99921145364165e-06, "epoch": 0.07, "percentage": 0.69, "elapsed_time": "0:01:26", "remaining_time": "3:26:46"}
+{"current_steps": 120, "total_steps": 15940, "loss": 0.838, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.999026491979809e-06, "epoch": 0.08, "percentage": 0.75, "elapsed_time": "0:01:32", "remaining_time": "3:23:53"}
+{"current_steps": 130, "total_steps": 15940, "loss": 0.8383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99882206332303e-06, "epoch": 0.08, "percentage": 0.82, "elapsed_time": "0:01:39", "remaining_time": "3:22:07"}
+{"current_steps": 140, "total_steps": 15940, "loss": 0.8705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99859816846739e-06, "epoch": 0.09, "percentage": 0.88, "elapsed_time": "0:01:47", "remaining_time": "3:23:01"}
+{"current_steps": 150, "total_steps": 15940, "loss": 0.7872, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.998354808284774e-06, "epoch": 0.09, "percentage": 0.94, "elapsed_time": "0:01:54", "remaining_time": "3:20:22"}
+{"current_steps": 160, "total_steps": 15940, "loss": 0.789, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.998091983722862e-06, "epoch": 0.1, "percentage": 1.0, "elapsed_time": "0:02:02", "remaining_time": "3:21:56"}
+{"current_steps": 170, "total_steps": 15940, "loss": 0.7749, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.997809695805136e-06, "epoch": 0.11, "percentage": 1.07, "elapsed_time": "0:02:10", "remaining_time": "3:22:13"}
+{"current_steps": 180, "total_steps": 15940, "loss": 0.7935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99750794563087e-06, "epoch": 0.11, "percentage": 1.13, "elapsed_time": "0:02:18", "remaining_time": "3:22:32"}
+{"current_steps": 190, "total_steps": 15940, "loss": 0.7817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.997186734375124e-06, "epoch": 0.12, "percentage": 1.19, "elapsed_time": "0:02:25", "remaining_time": "3:21:34"}
+{"current_steps": 200, "total_steps": 15940, "loss": 0.7429, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.996846063288746e-06, "epoch": 0.13, "percentage": 1.25, "elapsed_time": "0:02:33", "remaining_time": "3:20:50"}
+{"current_steps": 200, "total_steps": 15940, "loss": null, "eval_loss": 0.7712445855140686, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.13, "percentage": 1.25, "elapsed_time": "0:02:33", "remaining_time": "3:20:50"}
+{"current_steps": 210, "total_steps": 15940, "loss": 0.7636, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.996485933698364e-06, "epoch": 0.13, "percentage": 1.32, "elapsed_time": "0:03:46", "remaining_time": "4:42:21"}
+{"current_steps": 220, "total_steps": 15940, "loss": 0.7856, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.996106347006378e-06, "epoch": 0.14, "percentage": 1.38, "elapsed_time": "0:03:52", "remaining_time": "4:36:29"}
+{"current_steps": 230, "total_steps": 15940, "loss": 0.7529, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99570730469096e-06, "epoch": 0.14, "percentage": 1.44, "elapsed_time": "0:03:58", "remaining_time": "4:31:20"}
+{"current_steps": 240, "total_steps": 15940, "loss": 0.7671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.995288808306041e-06, "epoch": 0.15, "percentage": 1.51, "elapsed_time": "0:04:07", "remaining_time": "4:30:22"}
+{"current_steps": 250, "total_steps": 15940, "loss": 0.7231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.994850859481312e-06, "epoch": 0.16, "percentage": 1.57, "elapsed_time": "0:04:16", "remaining_time": "4:27:58"}
+{"current_steps": 260, "total_steps": 15940, "loss": 0.7694, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.994393459922219e-06, "epoch": 0.16, "percentage": 1.63, "elapsed_time": "0:04:24", "remaining_time": "4:25:29"}
+{"current_steps": 270, "total_steps": 15940, "loss": 0.7661, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.993916611409941e-06, "epoch": 0.17, "percentage": 1.69, "elapsed_time": "0:04:30", "remaining_time": "4:21:37"}
+{"current_steps": 280, "total_steps": 15940, "loss": 0.7952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.993420315801406e-06, "epoch": 0.18, "percentage": 1.76, "elapsed_time": "0:04:37", "remaining_time": "4:18:35"}
+{"current_steps": 290, "total_steps": 15940, "loss": 0.7966, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.992904575029265e-06, "epoch": 0.18, "percentage": 1.82, "elapsed_time": "0:04:44", "remaining_time": "4:16:09"}
+{"current_steps": 300, "total_steps": 15940, "loss": 0.8167, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.992369391101895e-06, "epoch": 0.19, "percentage": 1.88, "elapsed_time": "0:04:51", "remaining_time": "4:13:29"}
+{"current_steps": 310, "total_steps": 15940, "loss": 0.7368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.991814766103386e-06, "epoch": 0.19, "percentage": 1.94, "elapsed_time": "0:04:58", "remaining_time": "4:11:15"}
+{"current_steps": 320, "total_steps": 15940, "loss": 0.7796, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.991240702193532e-06, "epoch": 0.2, "percentage": 2.01, "elapsed_time": "0:05:06", "remaining_time": "4:09:03"}
+{"current_steps": 330, "total_steps": 15940, "loss": 0.7727, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99064720160783e-06, "epoch": 0.21, "percentage": 2.07, "elapsed_time": "0:05:15", "remaining_time": "4:09:07"}
+{"current_steps": 340, "total_steps": 15940, "loss": 0.7604, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.990034266657468e-06, "epoch": 0.21, "percentage": 2.13, "elapsed_time": "0:05:24", "remaining_time": "4:07:46"}
+{"current_steps": 350, "total_steps": 15940, "loss": 0.7399, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.989401899729307e-06, "epoch": 0.22, "percentage": 2.2, "elapsed_time": "0:05:31", "remaining_time": "4:06:03"}
+{"current_steps": 360, "total_steps": 15940, "loss": 0.7715, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.988750103285883e-06, "epoch": 0.23, "percentage": 2.26, "elapsed_time": "0:05:39", "remaining_time": "4:04:34"}
+{"current_steps": 370, "total_steps": 15940, "loss": 0.738, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.988078879865396e-06, "epoch": 0.23, "percentage": 2.32, "elapsed_time": "0:05:47", "remaining_time": "4:03:51"}
+{"current_steps": 380, "total_steps": 15940, "loss": 0.8025, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.987388232081694e-06, "epoch": 0.24, "percentage": 2.38, "elapsed_time": "0:05:55", "remaining_time": "4:02:42"}
+{"current_steps": 390, "total_steps": 15940, "loss": 0.7561, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.98667816262427e-06, "epoch": 0.24, "percentage": 2.45, "elapsed_time": "0:06:02", "remaining_time": "4:00:39"}
+{"current_steps": 400, "total_steps": 15940, "loss": 0.7549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.985948674258243e-06, "epoch": 0.25, "percentage": 2.51, "elapsed_time": "0:06:09", "remaining_time": "3:58:57"}
+{"current_steps": 400, "total_steps": 15940, "loss": null, "eval_loss": 0.743410587310791, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.25, "percentage": 2.51, "elapsed_time": "0:06:09", "remaining_time": "3:58:57"}
+{"current_steps": 410, "total_steps": 15940, "loss": 0.7694, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.985199769824359e-06, "epoch": 0.26, "percentage": 2.57, "elapsed_time": "0:07:22", "remaining_time": "4:39:13"}
+{"current_steps": 420, "total_steps": 15940, "loss": 0.7353, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.984431452238968e-06, "epoch": 0.26, "percentage": 2.63, "elapsed_time": "0:07:29", "remaining_time": "4:36:35"}
+{"current_steps": 430, "total_steps": 15940, "loss": 0.7299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.983643724494017e-06, "epoch": 0.27, "percentage": 2.7, "elapsed_time": "0:07:35", "remaining_time": "4:33:43"}
+{"current_steps": 440, "total_steps": 15940, "loss": 0.754, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.982836589657043e-06, "epoch": 0.28, "percentage": 2.76, "elapsed_time": "0:07:42", "remaining_time": "4:31:15"}
+{"current_steps": 450, "total_steps": 15940, "loss": 0.7355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.98201005087116e-06, "epoch": 0.28, "percentage": 2.82, "elapsed_time": "0:07:48", "remaining_time": "4:29:03"}
+{"current_steps": 460, "total_steps": 15940, "loss": 0.7543, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.981164111355036e-06, "epoch": 0.29, "percentage": 2.89, "elapsed_time": "0:07:56", "remaining_time": "4:27:23"}
+{"current_steps": 470, "total_steps": 15940, "loss": 0.7568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.98029877440289e-06, "epoch": 0.29, "percentage": 2.95, "elapsed_time": "0:08:03", "remaining_time": "4:25:00"}
+{"current_steps": 480, "total_steps": 15940, "loss": 0.7313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.979414043384485e-06, "epoch": 0.3, "percentage": 3.01, "elapsed_time": "0:08:10", "remaining_time": "4:23:26"}
+{"current_steps": 490, "total_steps": 15940, "loss": 0.7456, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.978509921745101e-06, "epoch": 0.31, "percentage": 3.07, "elapsed_time": "0:08:17", "remaining_time": "4:21:27"}
+{"current_steps": 500, "total_steps": 15940, "loss": 0.7585, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.97758641300553e-06, "epoch": 0.31, "percentage": 3.14, "elapsed_time": "0:08:24", "remaining_time": "4:19:29"}
+{"current_steps": 510, "total_steps": 15940, "loss": 0.7311, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.97664352076206e-06, "epoch": 0.32, "percentage": 3.2, "elapsed_time": "0:08:30", "remaining_time": "4:17:28"}
+{"current_steps": 520, "total_steps": 15940, "loss": 0.7173, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.97568124868646e-06, "epoch": 0.33, "percentage": 3.26, "elapsed_time": "0:08:37", "remaining_time": "4:15:51"}
+{"current_steps": 530, "total_steps": 15940, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.974699600525972e-06, "epoch": 0.33, "percentage": 3.32, "elapsed_time": "0:08:45", "remaining_time": "4:14:32"}
+{"current_steps": 540, "total_steps": 15940, "loss": 0.757, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.973698580103286e-06, "epoch": 0.34, "percentage": 3.39, "elapsed_time": "0:08:52", "remaining_time": "4:13:06"}
+{"current_steps": 550, "total_steps": 15940, "loss": 0.7717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.972678191316533e-06, "epoch": 0.35, "percentage": 3.45, "elapsed_time": "0:08:59", "remaining_time": "4:11:22"}
+{"current_steps": 560, "total_steps": 15940, "loss": 0.7314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.971638438139266e-06, "epoch": 0.35, "percentage": 3.51, "elapsed_time": "0:09:08", "remaining_time": "4:10:58"}
+{"current_steps": 570, "total_steps": 15940, "loss": 0.7112, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.97057932462045e-06, "epoch": 0.36, "percentage": 3.58, "elapsed_time": "0:09:14", "remaining_time": "4:09:21"}
+{"current_steps": 580, "total_steps": 15940, "loss": 0.7802, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.96950085488444e-06, "epoch": 0.36, "percentage": 3.64, "elapsed_time": "0:09:20", "remaining_time": "4:07:31"}
+{"current_steps": 590, "total_steps": 15940, "loss": 0.7472, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.968403033130963e-06, "epoch": 0.37, "percentage": 3.7, "elapsed_time": "0:09:27", "remaining_time": "4:06:11"}
+{"current_steps": 600, "total_steps": 15940, "loss": 0.7552, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.967285863635112e-06, "epoch": 0.38, "percentage": 3.76, "elapsed_time": "0:09:34", "remaining_time": "4:04:48"}
+{"current_steps": 600, "total_steps": 15940, "loss": null, "eval_loss": 0.733000636100769, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.38, "percentage": 3.76, "elapsed_time": "0:09:34", "remaining_time": "4:04:48"}
+{"current_steps": 610, "total_steps": 15940, "loss": 0.7274, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.966149350747321e-06, "epoch": 0.38, "percentage": 3.83, "elapsed_time": "0:10:50", "remaining_time": "4:32:16"}
+{"current_steps": 620, "total_steps": 15940, "loss": 0.7734, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.964993498893349e-06, "epoch": 0.39, "percentage": 3.89, "elapsed_time": "0:10:56", "remaining_time": "4:30:28"}
+{"current_steps": 630, "total_steps": 15940, "loss": 0.7117, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.963818312574265e-06, "epoch": 0.4, "percentage": 3.95, "elapsed_time": "0:11:03", "remaining_time": "4:28:40"}
+{"current_steps": 640, "total_steps": 15940, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.962623796366428e-06, "epoch": 0.4, "percentage": 4.02, "elapsed_time": "0:11:10", "remaining_time": "4:27:00"}
+{"current_steps": 650, "total_steps": 15940, "loss": 0.764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.961409954921472e-06, "epoch": 0.41, "percentage": 4.08, "elapsed_time": "0:11:16", "remaining_time": "4:25:11"}
+{"current_steps": 660, "total_steps": 15940, "loss": 0.7385, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.96017679296629e-06, "epoch": 0.41, "percentage": 4.14, "elapsed_time": "0:11:24", "remaining_time": "4:24:00"}
+{"current_steps": 670, "total_steps": 15940, "loss": 0.7386, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.958924315303005e-06, "epoch": 0.42, "percentage": 4.2, "elapsed_time": "0:11:32", "remaining_time": "4:22:59"}
+{"current_steps": 680, "total_steps": 15940, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.95765252680896e-06, "epoch": 0.43, "percentage": 4.27, "elapsed_time": "0:11:38", "remaining_time": "4:21:18"}
+{"current_steps": 690, "total_steps": 15940, "loss": 0.7104, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.956361432436705e-06, "epoch": 0.43, "percentage": 4.33, "elapsed_time": "0:11:45", "remaining_time": "4:20:01"}
+{"current_steps": 700, "total_steps": 15940, "loss": 0.6988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.95505103721396e-06, "epoch": 0.44, "percentage": 4.39, "elapsed_time": "0:11:54", "remaining_time": "4:19:25"}
+{"current_steps": 710, "total_steps": 15940, "loss": 0.7177, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.953721346243613e-06, "epoch": 0.45, "percentage": 4.45, "elapsed_time": "0:12:04", "remaining_time": "4:18:50"}
+{"current_steps": 720, "total_steps": 15940, "loss": 0.6804, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.952372364703688e-06, "epoch": 0.45, "percentage": 4.52, "elapsed_time": "0:12:10", "remaining_time": "4:17:32"}
+{"current_steps": 730, "total_steps": 15940, "loss": 0.7432, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.95100409784733e-06, "epoch": 0.46, "percentage": 4.58, "elapsed_time": "0:12:18", "remaining_time": "4:16:31"}
+{"current_steps": 740, "total_steps": 15940, "loss": 0.7521, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.949616551002787e-06, "epoch": 0.46, "percentage": 4.64, "elapsed_time": "0:12:25", "remaining_time": "4:15:08"}
+{"current_steps": 750, "total_steps": 15940, "loss": 0.7624, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.948209729573384e-06, "epoch": 0.47, "percentage": 4.71, "elapsed_time": "0:12:32", "remaining_time": "4:13:54"}
+{"current_steps": 760, "total_steps": 15940, "loss": 0.7619, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.946783639037503e-06, "epoch": 0.48, "percentage": 4.77, "elapsed_time": "0:12:39", "remaining_time": "4:12:40"}
+{"current_steps": 770, "total_steps": 15940, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.945338284948568e-06, "epoch": 0.48, "percentage": 4.83, "elapsed_time": "0:12:47", "remaining_time": "4:11:56"}
+{"current_steps": 780, "total_steps": 15940, "loss": 0.7621, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.943873672935014e-06, "epoch": 0.49, "percentage": 4.89, "elapsed_time": "0:12:55", "remaining_time": "4:11:17"}
+{"current_steps": 790, "total_steps": 15940, "loss": 0.6923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.94238980870027e-06, "epoch": 0.5, "percentage": 4.96, "elapsed_time": "0:13:03", "remaining_time": "4:10:30"}
+{"current_steps": 800, "total_steps": 15940, "loss": 0.7265, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.940886698022733e-06, "epoch": 0.5, "percentage": 5.02, "elapsed_time": "0:13:12", "remaining_time": "4:09:50"}
+{"current_steps": 800, "total_steps": 15940, "loss": null, "eval_loss": 0.7256230711936951, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.5, "percentage": 5.02, "elapsed_time": "0:13:12", "remaining_time": "4:09:50"}
+{"current_steps": 810, "total_steps": 15940, "loss": 0.6976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.93936434675576e-06, "epoch": 0.51, "percentage": 5.08, "elapsed_time": "0:14:31", "remaining_time": "4:31:20"}
+{"current_steps": 820, "total_steps": 15940, "loss": 0.7028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.93782276082762e-06, "epoch": 0.51, "percentage": 5.14, "elapsed_time": "0:14:38", "remaining_time": "4:30:00"}
+{"current_steps": 830, "total_steps": 15940, "loss": 0.7253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.936261946241492e-06, "epoch": 0.52, "percentage": 5.21, "elapsed_time": "0:14:45", "remaining_time": "4:28:47"}
+{"current_steps": 840, "total_steps": 15940, "loss": 0.7096, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.934681909075434e-06, "epoch": 0.53, "percentage": 5.27, "elapsed_time": "0:14:55", "remaining_time": "4:28:15"}
+{"current_steps": 850, "total_steps": 15940, "loss": 0.745, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.93308265548236e-06, "epoch": 0.53, "percentage": 5.33, "elapsed_time": "0:15:02", "remaining_time": "4:27:00"}
+{"current_steps": 860, "total_steps": 15940, "loss": 0.7111, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.931464191690015e-06, "epoch": 0.54, "percentage": 5.4, "elapsed_time": "0:15:10", "remaining_time": "4:25:57"}
+{"current_steps": 870, "total_steps": 15940, "loss": 0.7296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.929826524000948e-06, "epoch": 0.55, "percentage": 5.46, "elapsed_time": "0:15:19", "remaining_time": "4:25:33"}
+{"current_steps": 880, "total_steps": 15940, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.928169658792498e-06, "epoch": 0.55, "percentage": 5.52, "elapsed_time": "0:15:29", "remaining_time": "4:25:09"}
+{"current_steps": 890, "total_steps": 15940, "loss": 0.7156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.926493602516758e-06, "epoch": 0.56, "percentage": 5.58, "elapsed_time": "0:15:37", "remaining_time": "4:24:14"}
+{"current_steps": 900, "total_steps": 15940, "loss": 0.7956, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.924798361700554e-06, "epoch": 0.56, "percentage": 5.65, "elapsed_time": "0:15:48", "remaining_time": "4:24:14"}
+{"current_steps": 910, "total_steps": 15940, "loss": 0.7361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.923083942945419e-06, "epoch": 0.57, "percentage": 5.71, "elapsed_time": "0:15:56", "remaining_time": "4:23:18"}
+{"current_steps": 920, "total_steps": 15940, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.92135035292757e-06, "epoch": 0.58, "percentage": 5.77, "elapsed_time": "0:16:05", "remaining_time": "4:22:44"}
+{"current_steps": 930, "total_steps": 15940, "loss": 0.6967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.919597598397882e-06, "epoch": 0.58, "percentage": 5.83, "elapsed_time": "0:16:13", "remaining_time": "4:21:45"}
+{"current_steps": 940, "total_steps": 15940, "loss": 0.7509, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.91782568618185e-06, "epoch": 0.59, "percentage": 5.9, "elapsed_time": "0:16:21", "remaining_time": "4:20:59"}
+{"current_steps": 950, "total_steps": 15940, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.916034623179584e-06, "epoch": 0.6, "percentage": 5.96, "elapsed_time": "0:16:29", "remaining_time": "4:20:18"}
+{"current_steps": 960, "total_steps": 15940, "loss": 0.7194, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.914224416365765e-06, "epoch": 0.6, "percentage": 6.02, "elapsed_time": "0:16:38", "remaining_time": "4:19:47"}
+{"current_steps": 970, "total_steps": 15940, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.91239507278962e-06, "epoch": 0.61, "percentage": 6.09, "elapsed_time": "0:16:45", "remaining_time": "4:18:42"}
+{"current_steps": 980, "total_steps": 15940, "loss": 0.7314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.910546599574903e-06, "epoch": 0.61, "percentage": 6.15, "elapsed_time": "0:16:53", "remaining_time": "4:17:50"}
+{"current_steps": 990, "total_steps": 15940, "loss": 0.7549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.908679003919856e-06, "epoch": 0.62, "percentage": 6.21, "elapsed_time": "0:17:00", "remaining_time": "4:16:49"}
+{"current_steps": 1000, "total_steps": 15940, "loss": 0.7524, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.906792293097194e-06, "epoch": 0.63, "percentage": 6.27, "elapsed_time": "0:17:08", "remaining_time": "4:16:10"}
+{"current_steps": 1000, "total_steps": 15940, "loss": null, "eval_loss": 0.7200015187263489, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.63, "percentage": 6.27, "elapsed_time": "0:17:08", "remaining_time": "4:16:10"}
+{"current_steps": 1010, "total_steps": 15940, "loss": 0.7218, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.904886474454063e-06, "epoch": 0.63, "percentage": 6.34, "elapsed_time": "0:18:26", "remaining_time": "4:32:34"}
+{"current_steps": 1020, "total_steps": 15940, "loss": 0.7171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.90296155541202e-06, "epoch": 0.64, "percentage": 6.4, "elapsed_time": "0:18:33", "remaining_time": "4:31:23"}
+{"current_steps": 1030, "total_steps": 15940, "loss": 0.758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.901017543467005e-06, "epoch": 0.65, "percentage": 6.46, "elapsed_time": "0:18:40", "remaining_time": "4:30:21"}
+{"current_steps": 1040, "total_steps": 15940, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.899054446189305e-06, "epoch": 0.65, "percentage": 6.52, "elapsed_time": "0:18:47", "remaining_time": "4:29:18"}
+{"current_steps": 1050, "total_steps": 15940, "loss": 0.7088, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.897072271223526e-06, "epoch": 0.66, "percentage": 6.59, "elapsed_time": "0:18:54", "remaining_time": "4:28:07"}
+{"current_steps": 1060, "total_steps": 15940, "loss": 0.7804, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.895071026288574e-06, "epoch": 0.66, "percentage": 6.65, "elapsed_time": "0:19:01", "remaining_time": "4:27:00"}
+{"current_steps": 1070, "total_steps": 15940, "loss": 0.6935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.893050719177608e-06, "epoch": 0.67, "percentage": 6.71, "elapsed_time": "0:19:07", "remaining_time": "4:25:52"}
+{"current_steps": 1080, "total_steps": 15940, "loss": 0.6894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.891011357758022e-06, "epoch": 0.68, "percentage": 6.78, "elapsed_time": "0:19:14", "remaining_time": "4:24:42"}
+{"current_steps": 1090, "total_steps": 15940, "loss": 0.7244, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.888952949971411e-06, "epoch": 0.68, "percentage": 6.84, "elapsed_time": "0:19:22", "remaining_time": "4:24:00"}
+{"current_steps": 1100, "total_steps": 15940, "loss": 0.8156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.886875503833537e-06, "epoch": 0.69, "percentage": 6.9, "elapsed_time": "0:19:29", "remaining_time": "4:22:51"}
+{"current_steps": 1110, "total_steps": 15940, "loss": 0.7478, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.884779027434304e-06, "epoch": 0.7, "percentage": 6.96, "elapsed_time": "0:19:35", "remaining_time": "4:21:50"}
+{"current_steps": 1120, "total_steps": 15940, "loss": 0.7187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.882663528937716e-06, "epoch": 0.7, "percentage": 7.03, "elapsed_time": "0:19:42", "remaining_time": "4:20:47"}
+{"current_steps": 1130, "total_steps": 15940, "loss": 0.7145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.880529016581863e-06, "epoch": 0.71, "percentage": 7.09, "elapsed_time": "0:19:51", "remaining_time": "4:20:11"}
+{"current_steps": 1140, "total_steps": 15940, "loss": 0.731, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.878375498678869e-06, "epoch": 0.72, "percentage": 7.15, "elapsed_time": "0:19:57", "remaining_time": "4:19:07"}
+{"current_steps": 1150, "total_steps": 15940, "loss": 0.7323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.876202983614868e-06, "epoch": 0.72, "percentage": 7.21, "elapsed_time": "0:20:06", "remaining_time": "4:18:33"}
+{"current_steps": 1160, "total_steps": 15940, "loss": 0.6757, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.874011479849981e-06, "epoch": 0.73, "percentage": 7.28, "elapsed_time": "0:20:12", "remaining_time": "4:17:33"}
+{"current_steps": 1170, "total_steps": 15940, "loss": 0.7258, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.871800995918264e-06, "epoch": 0.73, "percentage": 7.34, "elapsed_time": "0:20:19", "remaining_time": "4:16:40"}
+{"current_steps": 1180, "total_steps": 15940, "loss": 0.7334, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.86957154042769e-06, "epoch": 0.74, "percentage": 7.4, "elapsed_time": "0:20:26", "remaining_time": "4:15:43"}
+{"current_steps": 1190, "total_steps": 15940, "loss": 0.7358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.867323122060108e-06, "epoch": 0.75, "percentage": 7.47, "elapsed_time": "0:20:36", "remaining_time": "4:15:30"}
+{"current_steps": 1200, "total_steps": 15940, "loss": 0.6976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.865055749571215e-06, "epoch": 0.75, "percentage": 7.53, "elapsed_time": "0:20:45", "remaining_time": "4:14:58"}
+{"current_steps": 1200, "total_steps": 15940, "loss": null, "eval_loss": 0.7151169180870056, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.75, "percentage": 7.53, "elapsed_time": "0:20:45", "remaining_time": "4:14:58"}
+{"current_steps": 1210, "total_steps": 15940, "loss": 0.6983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.862769431790513e-06, "epoch": 0.76, "percentage": 7.59, "elapsed_time": "0:21:57", "remaining_time": "4:27:14"}
+{"current_steps": 1220, "total_steps": 15940, "loss": 0.7171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.860464177621286e-06, "epoch": 0.77, "percentage": 7.65, "elapsed_time": "0:22:04", "remaining_time": "4:26:20"}
+{"current_steps": 1230, "total_steps": 15940, "loss": 0.7206, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.858139996040554e-06, "epoch": 0.77, "percentage": 7.72, "elapsed_time": "0:22:14", "remaining_time": "4:26:04"}
+{"current_steps": 1240, "total_steps": 15940, "loss": 0.7368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.855796896099044e-06, "epoch": 0.78, "percentage": 7.78, "elapsed_time": "0:22:21", "remaining_time": "4:24:58"}
+{"current_steps": 1250, "total_steps": 15940, "loss": 0.7372, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.85343488692116e-06, "epoch": 0.78, "percentage": 7.84, "elapsed_time": "0:22:27", "remaining_time": "4:23:56"}
+{"current_steps": 1260, "total_steps": 15940, "loss": 0.7373, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.851053977704931e-06, "epoch": 0.79, "percentage": 7.9, "elapsed_time": "0:22:34", "remaining_time": "4:22:57"}
+{"current_steps": 1270, "total_steps": 15940, "loss": 0.7608, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.848654177721999e-06, "epoch": 0.8, "percentage": 7.97, "elapsed_time": "0:22:42", "remaining_time": "4:22:17"}
+{"current_steps": 1280, "total_steps": 15940, "loss": 0.7227, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.846235496317556e-06, "epoch": 0.8, "percentage": 8.03, "elapsed_time": "0:22:49", "remaining_time": "4:21:23"}
+{"current_steps": 1290, "total_steps": 15940, "loss": 0.7415, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.843797942910328e-06, "epoch": 0.81, "percentage": 8.09, "elapsed_time": "0:22:56", "remaining_time": "4:20:29"}
+{"current_steps": 1300, "total_steps": 15940, "loss": 0.7206, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.841341526992536e-06, "epoch": 0.82, "percentage": 8.16, "elapsed_time": "0:23:03", "remaining_time": "4:19:39"}
+{"current_steps": 1310, "total_steps": 15940, "loss": 0.6704, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.838866258129847e-06, "epoch": 0.82, "percentage": 8.22, "elapsed_time": "0:23:10", "remaining_time": "4:18:53"}
+{"current_steps": 1320, "total_steps": 15940, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.836372145961346e-06, "epoch": 0.83, "percentage": 8.28, "elapsed_time": "0:23:19", "remaining_time": "4:18:19"}
+{"current_steps": 1330, "total_steps": 15940, "loss": 0.6916, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.833859200199498e-06, "epoch": 0.83, "percentage": 8.34, "elapsed_time": "0:23:27", "remaining_time": "4:17:44"}
+{"current_steps": 1340, "total_steps": 15940, "loss": 0.7568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.83132743063011e-06, "epoch": 0.84, "percentage": 8.41, "elapsed_time": "0:23:34", "remaining_time": "4:16:51"}
+{"current_steps": 1350, "total_steps": 15940, "loss": 0.7017, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.82877684711229e-06, "epoch": 0.85, "percentage": 8.47, "elapsed_time": "0:23:40", "remaining_time": "4:15:55"}
+{"current_steps": 1360, "total_steps": 15940, "loss": 0.7127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.826207459578412e-06, "epoch": 0.85, "percentage": 8.53, "elapsed_time": "0:23:48", "remaining_time": "4:15:13"}
+{"current_steps": 1370, "total_steps": 15940, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.823619278034073e-06, "epoch": 0.86, "percentage": 8.59, "elapsed_time": "0:23:56", "remaining_time": "4:14:33"}
+{"current_steps": 1380, "total_steps": 15940, "loss": 0.6942, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.821012312558059e-06, "epoch": 0.87, "percentage": 8.66, "elapsed_time": "0:24:04", "remaining_time": "4:13:56"}
+{"current_steps": 1390, "total_steps": 15940, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.818386573302305e-06, "epoch": 0.87, "percentage": 8.72, "elapsed_time": "0:24:12", "remaining_time": "4:13:20"}
+{"current_steps": 1400, "total_steps": 15940, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.815742070491852e-06, "epoch": 0.88, "percentage": 8.78, "elapsed_time": "0:24:18", "remaining_time": "4:12:31"}
+{"current_steps": 1400, "total_steps": 15940, "loss": null, "eval_loss": 0.7116020917892456, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.88, "percentage": 8.78, "elapsed_time": "0:24:18", "remaining_time": "4:12:31"}
+{"current_steps": 1410, "total_steps": 15940, "loss": 0.7105, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.81307881442481e-06, "epoch": 0.88, "percentage": 8.85, "elapsed_time": "0:25:30", "remaining_time": "4:22:52"}
+{"current_steps": 1420, "total_steps": 15940, "loss": 0.6994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.810396815472316e-06, "epoch": 0.89, "percentage": 8.91, "elapsed_time": "0:25:37", "remaining_time": "4:21:56"}
+{"current_steps": 1430, "total_steps": 15940, "loss": 0.7459, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.807696084078494e-06, "epoch": 0.9, "percentage": 8.97, "elapsed_time": "0:25:44", "remaining_time": "4:21:16"}
+{"current_steps": 1440, "total_steps": 15940, "loss": 0.7048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.804976630760419e-06, "epoch": 0.9, "percentage": 9.03, "elapsed_time": "0:25:51", "remaining_time": "4:20:19"}
+{"current_steps": 1450, "total_steps": 15940, "loss": 0.6975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.802238466108068e-06, "epoch": 0.91, "percentage": 9.1, "elapsed_time": "0:25:57", "remaining_time": "4:19:24"}
+{"current_steps": 1460, "total_steps": 15940, "loss": 0.737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.799481600784286e-06, "epoch": 0.92, "percentage": 9.16, "elapsed_time": "0:26:04", "remaining_time": "4:18:36"}
+{"current_steps": 1470, "total_steps": 15940, "loss": 0.7236, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.796706045524738e-06, "epoch": 0.92, "percentage": 9.22, "elapsed_time": "0:26:13", "remaining_time": "4:18:10"}
+{"current_steps": 1480, "total_steps": 15940, "loss": 0.6984, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.793911811137874e-06, "epoch": 0.93, "percentage": 9.28, "elapsed_time": "0:26:20", "remaining_time": "4:17:20"}
+{"current_steps": 1490, "total_steps": 15940, "loss": 0.8019, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.791098908504884e-06, "epoch": 0.93, "percentage": 9.35, "elapsed_time": "0:26:27", "remaining_time": "4:16:34"}
+{"current_steps": 1500, "total_steps": 15940, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.788267348579649e-06, "epoch": 0.94, "percentage": 9.41, "elapsed_time": "0:26:34", "remaining_time": "4:15:52"}
+{"current_steps": 1510, "total_steps": 15940, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.78541714238871e-06, "epoch": 0.95, "percentage": 9.47, "elapsed_time": "0:26:42", "remaining_time": "4:15:17"}
+{"current_steps": 1520, "total_steps": 15940, "loss": 0.7434, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.782548301031218e-06, "epoch": 0.95, "percentage": 9.54, "elapsed_time": "0:26:49", "remaining_time": "4:14:27"}
+{"current_steps": 1530, "total_steps": 15940, "loss": 0.7193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.77966083567889e-06, "epoch": 0.96, "percentage": 9.6, "elapsed_time": "0:26:58", "remaining_time": "4:14:04"}
+{"current_steps": 1540, "total_steps": 15940, "loss": 0.7763, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.776754757575975e-06, "epoch": 0.97, "percentage": 9.66, "elapsed_time": "0:27:07", "remaining_time": "4:13:34"}
+{"current_steps": 1550, "total_steps": 15940, "loss": 0.7494, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.773830078039193e-06, "epoch": 0.97, "percentage": 9.72, "elapsed_time": "0:27:14", "remaining_time": "4:12:56"}
+{"current_steps": 1560, "total_steps": 15940, "loss": 0.7078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.77088680845771e-06, "epoch": 0.98, "percentage": 9.79, "elapsed_time": "0:27:22", "remaining_time": "4:12:21"}
+{"current_steps": 1570, "total_steps": 15940, "loss": 0.7468, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.767924960293076e-06, "epoch": 0.98, "percentage": 9.85, "elapsed_time": "0:27:30", "remaining_time": "4:11:49"}
+{"current_steps": 1580, "total_steps": 15940, "loss": 0.7502, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.764944545079197e-06, "epoch": 0.99, "percentage": 9.91, "elapsed_time": "0:27:38", "remaining_time": "4:11:11"}
+{"current_steps": 1590, "total_steps": 15940, "loss": 0.7337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.761945574422276e-06, "epoch": 1.0, "percentage": 9.97, "elapsed_time": "0:27:44", "remaining_time": "4:10:24"}
+{"current_steps": 1600, "total_steps": 15940, "loss": 0.701, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.758928060000779e-06, "epoch": 1.0, "percentage": 10.04, "elapsed_time": "0:27:52", "remaining_time": "4:09:48"}
+{"current_steps": 1600, "total_steps": 15940, "loss": null, "eval_loss": 0.7084596157073975, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.0, "percentage": 10.04, "elapsed_time": "0:27:52", "remaining_time": "4:09:48"}
+{"current_steps": 1610, "total_steps": 15940, "loss": 0.7014, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.755892013565377e-06, "epoch": 1.01, "percentage": 10.1, "elapsed_time": "0:29:04", "remaining_time": "4:18:50"}
+{"current_steps": 1620, "total_steps": 15940, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.752837446938915e-06, "epoch": 1.02, "percentage": 10.16, "elapsed_time": "0:29:12", "remaining_time": "4:18:07"}
+{"current_steps": 1630, "total_steps": 15940, "loss": 0.7268, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.749764372016355e-06, "epoch": 1.02, "percentage": 10.23, "elapsed_time": "0:29:19", "remaining_time": "4:17:23"}
+{"current_steps": 1640, "total_steps": 15940, "loss": 0.6968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.746672800764734e-06, "epoch": 1.03, "percentage": 10.29, "elapsed_time": "0:29:25", "remaining_time": "4:16:37"}
+{"current_steps": 1650, "total_steps": 15940, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.743562745223118e-06, "epoch": 1.04, "percentage": 10.35, "elapsed_time": "0:29:32", "remaining_time": "4:15:51"}
+{"current_steps": 1660, "total_steps": 15940, "loss": 0.7199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.740434217502549e-06, "epoch": 1.04, "percentage": 10.41, "elapsed_time": "0:29:40", "remaining_time": "4:15:17"}
+{"current_steps": 1670, "total_steps": 15940, "loss": 0.687, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.737287229786007e-06, "epoch": 1.05, "percentage": 10.48, "elapsed_time": "0:29:47", "remaining_time": "4:14:33"}
+{"current_steps": 1680, "total_steps": 15940, "loss": 0.7003, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.734121794328358e-06, "epoch": 1.05, "percentage": 10.54, "elapsed_time": "0:29:53", "remaining_time": "4:13:45"}
+{"current_steps": 1690, "total_steps": 15940, "loss": 0.7329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.730937923456303e-06, "epoch": 1.06, "percentage": 10.6, "elapsed_time": "0:30:00", "remaining_time": "4:12:57"}
+{"current_steps": 1700, "total_steps": 15940, "loss": 0.6924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.727735629568335e-06, "epoch": 1.07, "percentage": 10.66, "elapsed_time": "0:30:07", "remaining_time": "4:12:24"}
+{"current_steps": 1710, "total_steps": 15940, "loss": 0.7219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.724514925134696e-06, "epoch": 1.07, "percentage": 10.73, "elapsed_time": "0:30:15", "remaining_time": "4:11:44"}
+{"current_steps": 1720, "total_steps": 15940, "loss": 0.6741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.721275822697307e-06, "epoch": 1.08, "percentage": 10.79, "elapsed_time": "0:30:21", "remaining_time": "4:10:59"}
+{"current_steps": 1730, "total_steps": 15940, "loss": 0.7047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.718018334869748e-06, "epoch": 1.09, "percentage": 10.85, "elapsed_time": "0:30:29", "remaining_time": "4:10:28"}
+{"current_steps": 1740, "total_steps": 15940, "loss": 0.7156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.714742474337187e-06, "epoch": 1.09, "percentage": 10.92, "elapsed_time": "0:30:37", "remaining_time": "4:09:58"}
+{"current_steps": 1750, "total_steps": 15940, "loss": 0.6887, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.711448253856336e-06, "epoch": 1.1, "percentage": 10.98, "elapsed_time": "0:30:47", "remaining_time": "4:09:40"}
+{"current_steps": 1760, "total_steps": 15940, "loss": 0.7373, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.708135686255415e-06, "epoch": 1.1, "percentage": 11.04, "elapsed_time": "0:30:55", "remaining_time": "4:09:10"}
+{"current_steps": 1770, "total_steps": 15940, "loss": 0.6652, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.704804784434077e-06, "epoch": 1.11, "percentage": 11.1, "elapsed_time": "0:31:02", "remaining_time": "4:08:29"}
+{"current_steps": 1780, "total_steps": 15940, "loss": 0.682, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.701455561363378e-06, "epoch": 1.12, "percentage": 11.17, "elapsed_time": "0:31:09", "remaining_time": "4:07:48"}
+{"current_steps": 1790, "total_steps": 15940, "loss": 0.6844, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.698088030085721e-06, "epoch": 1.12, "percentage": 11.23, "elapsed_time": "0:31:17", "remaining_time": "4:07:20"}
+{"current_steps": 1800, "total_steps": 15940, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.694702203714801e-06, "epoch": 1.13, "percentage": 11.29, "elapsed_time": "0:31:23", "remaining_time": "4:06:36"}
+{"current_steps": 1800, "total_steps": 15940, "loss": null, "eval_loss": 0.705936074256897, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.13, "percentage": 11.29, "elapsed_time": "0:31:23", "remaining_time": "4:06:36"}
+{"current_steps": 1810, "total_steps": 15940, "loss": 0.6897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.691298095435559e-06, "epoch": 1.14, "percentage": 11.36, "elapsed_time": "0:32:36", "remaining_time": "4:14:32"}
+{"current_steps": 1820, "total_steps": 15940, "loss": 0.6851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.687875718504126e-06, "epoch": 1.14, "percentage": 11.42, "elapsed_time": "0:32:43", "remaining_time": "4:13:54"}
+{"current_steps": 1830, "total_steps": 15940, "loss": 0.7132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.684435086247777e-06, "epoch": 1.15, "percentage": 11.48, "elapsed_time": "0:32:51", "remaining_time": "4:13:21"}
+{"current_steps": 1840, "total_steps": 15940, "loss": 0.7129, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.680976212064875e-06, "epoch": 1.15, "percentage": 11.54, "elapsed_time": "0:32:58", "remaining_time": "4:12:40"}
+{"current_steps": 1850, "total_steps": 15940, "loss": 0.6907, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.677499109424818e-06, "epoch": 1.16, "percentage": 11.61, "elapsed_time": "0:33:05", "remaining_time": "4:11:59"}
+{"current_steps": 1860, "total_steps": 15940, "loss": 0.7327, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.674003791867993e-06, "epoch": 1.17, "percentage": 11.67, "elapsed_time": "0:33:12", "remaining_time": "4:11:22"}
+{"current_steps": 1870, "total_steps": 15940, "loss": 0.7304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.670490273005713e-06, "epoch": 1.17, "percentage": 11.73, "elapsed_time": "0:33:20", "remaining_time": "4:10:50"}
+{"current_steps": 1880, "total_steps": 15940, "loss": 0.7076, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.666958566520175e-06, "epoch": 1.18, "percentage": 11.79, "elapsed_time": "0:33:27", "remaining_time": "4:10:12"}
+{"current_steps": 1890, "total_steps": 15940, "loss": 0.691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.663408686164399e-06, "epoch": 1.19, "percentage": 11.86, "elapsed_time": "0:33:33", "remaining_time": "4:09:30"}
+{"current_steps": 1900, "total_steps": 15940, "loss": 0.74, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.659840645762176e-06, "epoch": 1.19, "percentage": 11.92, "elapsed_time": "0:33:40", "remaining_time": "4:08:51"}
+{"current_steps": 1910, "total_steps": 15940, "loss": 0.7295, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.656254459208015e-06, "epoch": 1.2, "percentage": 11.98, "elapsed_time": "0:33:49", "remaining_time": "4:08:25"}
+{"current_steps": 1920, "total_steps": 15940, "loss": 0.651, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.652650140467094e-06, "epoch": 1.2, "percentage": 12.05, "elapsed_time": "0:33:58", "remaining_time": "4:08:05"}
+{"current_steps": 1930, "total_steps": 15940, "loss": 0.7028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.649027703575193e-06, "epoch": 1.21, "percentage": 12.11, "elapsed_time": "0:34:06", "remaining_time": "4:07:32"}
+{"current_steps": 1940, "total_steps": 15940, "loss": 0.7179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.645387162638652e-06, "epoch": 1.22, "percentage": 12.17, "elapsed_time": "0:34:15", "remaining_time": "4:07:10"}
+{"current_steps": 1950, "total_steps": 15940, "loss": 0.6872, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.641728531834313e-06, "epoch": 1.22, "percentage": 12.23, "elapsed_time": "0:34:21", "remaining_time": "4:06:28"}
+{"current_steps": 1960, "total_steps": 15940, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.638051825409454e-06, "epoch": 1.23, "percentage": 12.3, "elapsed_time": "0:34:28", "remaining_time": "4:05:51"}
+{"current_steps": 1970, "total_steps": 15940, "loss": 0.7183, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.634357057681749e-06, "epoch": 1.24, "percentage": 12.36, "elapsed_time": "0:34:35", "remaining_time": "4:05:17"}
+{"current_steps": 1980, "total_steps": 15940, "loss": 0.6795, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.630644243039207e-06, "epoch": 1.24, "percentage": 12.42, "elapsed_time": "0:34:42", "remaining_time": "4:04:41"}
+{"current_steps": 1990, "total_steps": 15940, "loss": 0.7075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.62691339594011e-06, "epoch": 1.25, "percentage": 12.48, "elapsed_time": "0:34:48", "remaining_time": "4:04:03"}
+{"current_steps": 2000, "total_steps": 15940, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.623164530912963e-06, "epoch": 1.25, "percentage": 12.55, "elapsed_time": "0:34:56", "remaining_time": "4:03:33"}
+{"current_steps": 2000, "total_steps": 15940, "loss": null, "eval_loss": 0.7040402293205261, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.25, "percentage": 12.55, "elapsed_time": "0:34:56", "remaining_time": "4:03:33"}
+{"current_steps": 2010, "total_steps": 15940, "loss": 0.6947, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.619397662556434e-06, "epoch": 1.26, "percentage": 12.61, "elapsed_time": "0:36:08", "remaining_time": "4:10:30"}
+{"current_steps": 2020, "total_steps": 15940, "loss": 0.7102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.615612805539305e-06, "epoch": 1.27, "percentage": 12.67, "elapsed_time": "0:36:15", "remaining_time": "4:09:51"}
+{"current_steps": 2030, "total_steps": 15940, "loss": 0.7068, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.6118099746004e-06, "epoch": 1.27, "percentage": 12.74, "elapsed_time": "0:36:25", "remaining_time": "4:09:32"}
+{"current_steps": 2040, "total_steps": 15940, "loss": 0.6528, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.607989184548544e-06, "epoch": 1.28, "percentage": 12.8, "elapsed_time": "0:36:31", "remaining_time": "4:08:51"}
+{"current_steps": 2050, "total_steps": 15940, "loss": 0.6838, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.604150450262488e-06, "epoch": 1.29, "percentage": 12.86, "elapsed_time": "0:36:38", "remaining_time": "4:08:13"}
+{"current_steps": 2060, "total_steps": 15940, "loss": 0.6908, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.600293786690873e-06, "epoch": 1.29, "percentage": 12.92, "elapsed_time": "0:36:45", "remaining_time": "4:07:38"}
+{"current_steps": 2070, "total_steps": 15940, "loss": 0.7153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.596419208852152e-06, "epoch": 1.3, "percentage": 12.99, "elapsed_time": "0:36:52", "remaining_time": "4:07:01"}
+{"current_steps": 2080, "total_steps": 15940, "loss": 0.67, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.592526731834536e-06, "epoch": 1.3, "percentage": 13.05, "elapsed_time": "0:36:58", "remaining_time": "4:06:21"}
+{"current_steps": 2090, "total_steps": 15940, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.588616370795947e-06, "epoch": 1.31, "percentage": 13.11, "elapsed_time": "0:37:05", "remaining_time": "4:05:50"}
+{"current_steps": 2100, "total_steps": 15940, "loss": 0.7037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.584688140963945e-06, "epoch": 1.32, "percentage": 13.17, "elapsed_time": "0:37:13", "remaining_time": "4:05:22"}
+{"current_steps": 2110, "total_steps": 15940, "loss": 0.7199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.580742057635672e-06, "epoch": 1.32, "percentage": 13.24, "elapsed_time": "0:37:22", "remaining_time": "4:04:56"}
+{"current_steps": 2120, "total_steps": 15940, "loss": 0.7098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.576778136177798e-06, "epoch": 1.33, "percentage": 13.3, "elapsed_time": "0:37:30", "remaining_time": "4:04:31"}
+{"current_steps": 2130, "total_steps": 15940, "loss": 0.7109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.572796392026455e-06, "epoch": 1.34, "percentage": 13.36, "elapsed_time": "0:37:39", "remaining_time": "4:04:06"}
+{"current_steps": 2140, "total_steps": 15940, "loss": 0.693, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.568796840687184e-06, "epoch": 1.34, "percentage": 13.43, "elapsed_time": "0:37:45", "remaining_time": "4:03:30"}
+{"current_steps": 2150, "total_steps": 15940, "loss": 0.6679, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.564779497734864e-06, "epoch": 1.35, "percentage": 13.49, "elapsed_time": "0:37:53", "remaining_time": "4:03:00"}
+{"current_steps": 2160, "total_steps": 15940, "loss": 0.6573, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.56074437881366e-06, "epoch": 1.36, "percentage": 13.55, "elapsed_time": "0:37:59", "remaining_time": "4:02:24"}
+{"current_steps": 2170, "total_steps": 15940, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.55669149963696e-06, "epoch": 1.36, "percentage": 13.61, "elapsed_time": "0:38:07", "remaining_time": "4:01:57"}
+{"current_steps": 2180, "total_steps": 15940, "loss": 0.6932, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.552620875987312e-06, "epoch": 1.37, "percentage": 13.68, "elapsed_time": "0:38:14", "remaining_time": "4:01:22"}
+{"current_steps": 2190, "total_steps": 15940, "loss": 0.6616, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.548532523716366e-06, "epoch": 1.37, "percentage": 13.74, "elapsed_time": "0:38:20", "remaining_time": "4:00:44"}
+{"current_steps": 2200, "total_steps": 15940, "loss": 0.7182, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.544426458744805e-06, "epoch": 1.38, "percentage": 13.8, "elapsed_time": "0:38:26", "remaining_time": "4:00:07"}
+{"current_steps": 2200, "total_steps": 15940, "loss": null, "eval_loss": 0.7022137641906738, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.38, "percentage": 13.8, "elapsed_time": "0:38:26", "remaining_time": "4:00:07"}
+{"current_steps": 2210, "total_steps": 15940, "loss": 0.6878, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.540302697062294e-06, "epoch": 1.39, "percentage": 13.86, "elapsed_time": "0:39:39", "remaining_time": "4:06:22"}
+{"current_steps": 2220, "total_steps": 15940, "loss": 0.6979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.536161254727407e-06, "epoch": 1.39, "percentage": 13.93, "elapsed_time": "0:39:47", "remaining_time": "4:05:55"}
+{"current_steps": 2230, "total_steps": 15940, "loss": 0.6749, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.532002147867575e-06, "epoch": 1.4, "percentage": 13.99, "elapsed_time": "0:39:55", "remaining_time": "4:05:27"}
+{"current_steps": 2240, "total_steps": 15940, "loss": 0.6987, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.527825392679012e-06, "epoch": 1.41, "percentage": 14.05, "elapsed_time": "0:40:06", "remaining_time": "4:05:19"}
+{"current_steps": 2250, "total_steps": 15940, "loss": 0.6888, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.523631005426658e-06, "epoch": 1.41, "percentage": 14.12, "elapsed_time": "0:40:13", "remaining_time": "4:04:46"}
+{"current_steps": 2260, "total_steps": 15940, "loss": 0.6471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.51941900244412e-06, "epoch": 1.42, "percentage": 14.18, "elapsed_time": "0:40:19", "remaining_time": "4:04:07"}
+{"current_steps": 2270, "total_steps": 15940, "loss": 0.6689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.515189400133594e-06, "epoch": 1.42, "percentage": 14.24, "elapsed_time": "0:40:27", "remaining_time": "4:03:38"}
+{"current_steps": 2280, "total_steps": 15940, "loss": 0.7001, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.510942214965819e-06, "epoch": 1.43, "percentage": 14.3, "elapsed_time": "0:40:35", "remaining_time": "4:03:13"}
+{"current_steps": 2290, "total_steps": 15940, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.506677463480003e-06, "epoch": 1.44, "percentage": 14.37, "elapsed_time": "0:40:43", "remaining_time": "4:02:47"}
+{"current_steps": 2300, "total_steps": 15940, "loss": 0.7008, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.50239516228376e-06, "epoch": 1.44, "percentage": 14.43, "elapsed_time": "0:40:50", "remaining_time": "4:02:11"}
+{"current_steps": 2310, "total_steps": 15940, "loss": 0.7122, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.49809532805304e-06, "epoch": 1.45, "percentage": 14.49, "elapsed_time": "0:40:59", "remaining_time": "4:01:49"}
+{"current_steps": 2320, "total_steps": 15940, "loss": 0.7106, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.493777977532072e-06, "epoch": 1.46, "percentage": 14.55, "elapsed_time": "0:41:06", "remaining_time": "4:01:20"}
+{"current_steps": 2330, "total_steps": 15940, "loss": 0.6739, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.489443127533304e-06, "epoch": 1.46, "percentage": 14.62, "elapsed_time": "0:41:12", "remaining_time": "4:00:44"}
+{"current_steps": 2340, "total_steps": 15940, "loss": 0.6888, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.485090794937319e-06, "epoch": 1.47, "percentage": 14.68, "elapsed_time": "0:41:22", "remaining_time": "4:00:25"}
+{"current_steps": 2350, "total_steps": 15940, "loss": 0.6986, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.480720996692783e-06, "epoch": 1.47, "percentage": 14.74, "elapsed_time": "0:41:28", "remaining_time": "3:59:50"}
+{"current_steps": 2360, "total_steps": 15940, "loss": 0.7314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.476333749816382e-06, "epoch": 1.48, "percentage": 14.81, "elapsed_time": "0:41:36", "remaining_time": "3:59:24"}
+{"current_steps": 2370, "total_steps": 15940, "loss": 0.6602, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.47192907139274e-06, "epoch": 1.49, "percentage": 14.87, "elapsed_time": "0:41:42", "remaining_time": "3:58:49"}
+{"current_steps": 2380, "total_steps": 15940, "loss": 0.6454, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.46750697857437e-06, "epoch": 1.49, "percentage": 14.93, "elapsed_time": "0:41:51", "remaining_time": "3:58:27"}
+{"current_steps": 2390, "total_steps": 15940, "loss": 0.6499, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.463067488581598e-06, "epoch": 1.5, "percentage": 14.99, "elapsed_time": "0:41:58", "remaining_time": "3:57:59"}
+{"current_steps": 2400, "total_steps": 15940, "loss": 0.7267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.45861061870249e-06, "epoch": 1.51, "percentage": 15.06, "elapsed_time": "0:42:06", "remaining_time": "3:57:35"}
+{"current_steps": 2400, "total_steps": 15940, "loss": null, "eval_loss": 0.6993948817253113, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.51, "percentage": 15.06, "elapsed_time": "0:42:06", "remaining_time": "3:57:35"}
+{"current_steps": 2410, "total_steps": 15940, "loss": 0.6934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.454136386292804e-06, "epoch": 1.51, "percentage": 15.12, "elapsed_time": "0:43:18", "remaining_time": "4:03:10"}
+{"current_steps": 2420, "total_steps": 15940, "loss": 0.7095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.449644808775902e-06, "epoch": 1.52, "percentage": 15.18, "elapsed_time": "0:43:26", "remaining_time": "4:02:40"}
+{"current_steps": 2430, "total_steps": 15940, "loss": 0.6626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.445135903642693e-06, "epoch": 1.52, "percentage": 15.24, "elapsed_time": "0:43:32", "remaining_time": "4:02:05"}
+{"current_steps": 2440, "total_steps": 15940, "loss": 0.6513, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.440609688451561e-06, "epoch": 1.53, "percentage": 15.31, "elapsed_time": "0:43:40", "remaining_time": "4:01:39"}
+{"current_steps": 2450, "total_steps": 15940, "loss": 0.711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.4360661808283e-06, "epoch": 1.54, "percentage": 15.37, "elapsed_time": "0:43:47", "remaining_time": "4:01:06"}
+{"current_steps": 2460, "total_steps": 15940, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.431505398466045e-06, "epoch": 1.54, "percentage": 15.43, "elapsed_time": "0:43:53", "remaining_time": "4:00:32"}
+{"current_steps": 2470, "total_steps": 15940, "loss": 0.7073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.426927359125195e-06, "epoch": 1.55, "percentage": 15.5, "elapsed_time": "0:44:01", "remaining_time": "4:00:07"}
+{"current_steps": 2480, "total_steps": 15940, "loss": 0.6557, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.422332080633361e-06, "epoch": 1.56, "percentage": 15.56, "elapsed_time": "0:44:08", "remaining_time": "3:59:33"}
+{"current_steps": 2490, "total_steps": 15940, "loss": 0.6786, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.417719580885275e-06, "epoch": 1.56, "percentage": 15.62, "elapsed_time": "0:44:15", "remaining_time": "3:59:06"}
+{"current_steps": 2500, "total_steps": 15940, "loss": 0.6159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.413089877842735e-06, "epoch": 1.57, "percentage": 15.68, "elapsed_time": "0:44:23", "remaining_time": "3:58:40"}
+{"current_steps": 2510, "total_steps": 15940, "loss": 0.7341, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.408442989534536e-06, "epoch": 1.57, "percentage": 15.75, "elapsed_time": "0:44:30", "remaining_time": "3:58:10"}
+{"current_steps": 2520, "total_steps": 15940, "loss": 0.6737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.403778934056392e-06, "epoch": 1.58, "percentage": 15.81, "elapsed_time": "0:44:40", "remaining_time": "3:57:56"}
+{"current_steps": 2530, "total_steps": 15940, "loss": 0.6832, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.399097729570865e-06, "epoch": 1.59, "percentage": 15.87, "elapsed_time": "0:44:46", "remaining_time": "3:57:21"}
+{"current_steps": 2540, "total_steps": 15940, "loss": 0.6691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.394399394307303e-06, "epoch": 1.59, "percentage": 15.93, "elapsed_time": "0:44:54", "remaining_time": "3:56:56"}
+{"current_steps": 2550, "total_steps": 15940, "loss": 0.7072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.38968394656176e-06, "epoch": 1.6, "percentage": 16.0, "elapsed_time": "0:45:04", "remaining_time": "3:56:38"}
+{"current_steps": 2560, "total_steps": 15940, "loss": 0.7068, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.384951404696933e-06, "epoch": 1.61, "percentage": 16.06, "elapsed_time": "0:45:11", "remaining_time": "3:56:13"}
+{"current_steps": 2570, "total_steps": 15940, "loss": 0.6476, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.380201787142085e-06, "epoch": 1.61, "percentage": 16.12, "elapsed_time": "0:45:17", "remaining_time": "3:55:39"}
+{"current_steps": 2580, "total_steps": 15940, "loss": 0.6805, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.37543511239297e-06, "epoch": 1.62, "percentage": 16.19, "elapsed_time": "0:45:24", "remaining_time": "3:55:06"}
+{"current_steps": 2590, "total_steps": 15940, "loss": 0.6887, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.370651399011769e-06, "epoch": 1.62, "percentage": 16.25, "elapsed_time": "0:45:30", "remaining_time": "3:54:34"}
+{"current_steps": 2600, "total_steps": 15940, "loss": 0.6912, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.365850665627016e-06, "epoch": 1.63, "percentage": 16.31, "elapsed_time": "0:45:37", "remaining_time": "3:54:06"}
+{"current_steps": 2600, "total_steps": 15940, "loss": null, "eval_loss": 0.6971801519393921, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.63, "percentage": 16.31, "elapsed_time": "0:45:37", "remaining_time": "3:54:06"}
+{"current_steps": 2610, "total_steps": 15940, "loss": 0.6479, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.36103293093352e-06, "epoch": 1.64, "percentage": 16.37, "elapsed_time": "0:46:51", "remaining_time": "3:59:17"}
+{"current_steps": 2620, "total_steps": 15940, "loss": 0.6788, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.356198213692297e-06, "epoch": 1.64, "percentage": 16.44, "elapsed_time": "0:46:58", "remaining_time": "3:58:48"}
+{"current_steps": 2630, "total_steps": 15940, "loss": 0.6481, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.351346532730499e-06, "epoch": 1.65, "percentage": 16.5, "elapsed_time": "0:47:05", "remaining_time": "3:58:17"}
+{"current_steps": 2640, "total_steps": 15940, "loss": 0.6893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.346477906941331e-06, "epoch": 1.66, "percentage": 16.56, "elapsed_time": "0:47:11", "remaining_time": "3:57:45"}
+{"current_steps": 2650, "total_steps": 15940, "loss": 0.6784, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.341592355283986e-06, "epoch": 1.66, "percentage": 16.62, "elapsed_time": "0:47:17", "remaining_time": "3:57:12"}
+{"current_steps": 2660, "total_steps": 15940, "loss": 0.6834, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.336689896783575e-06, "epoch": 1.67, "percentage": 16.69, "elapsed_time": "0:47:24", "remaining_time": "3:56:42"}
+{"current_steps": 2670, "total_steps": 15940, "loss": 0.6701, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.331770550531037e-06, "epoch": 1.68, "percentage": 16.75, "elapsed_time": "0:47:32", "remaining_time": "3:56:19"}
+{"current_steps": 2680, "total_steps": 15940, "loss": 0.6691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.32683433568308e-06, "epoch": 1.68, "percentage": 16.81, "elapsed_time": "0:47:40", "remaining_time": "3:55:51"}
+{"current_steps": 2690, "total_steps": 15940, "loss": 0.6818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.321881271462104e-06, "epoch": 1.69, "percentage": 16.88, "elapsed_time": "0:47:47", "remaining_time": "3:55:26"}
+{"current_steps": 2700, "total_steps": 15940, "loss": 0.6852, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.316911377156116e-06, "epoch": 1.69, "percentage": 16.94, "elapsed_time": "0:47:55", "remaining_time": "3:55:01"}
+{"current_steps": 2710, "total_steps": 15940, "loss": 0.6653, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.31192467211867e-06, "epoch": 1.7, "percentage": 17.0, "elapsed_time": "0:48:03", "remaining_time": "3:54:38"}
+{"current_steps": 2720, "total_steps": 15940, "loss": 0.6671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.306921175768776e-06, "epoch": 1.71, "percentage": 17.06, "elapsed_time": "0:48:10", "remaining_time": "3:54:08"}
+{"current_steps": 2730, "total_steps": 15940, "loss": 0.7066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.301900907590836e-06, "epoch": 1.71, "percentage": 17.13, "elapsed_time": "0:48:16", "remaining_time": "3:53:36"}
+{"current_steps": 2740, "total_steps": 15940, "loss": 0.7326, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.296863887134561e-06, "epoch": 1.72, "percentage": 17.19, "elapsed_time": "0:48:23", "remaining_time": "3:53:05"}
+{"current_steps": 2750, "total_steps": 15940, "loss": 0.6758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.291810134014904e-06, "epoch": 1.73, "percentage": 17.25, "elapsed_time": "0:48:33", "remaining_time": "3:52:56"}
+{"current_steps": 2760, "total_steps": 15940, "loss": 0.6645, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.286739667911973e-06, "epoch": 1.73, "percentage": 17.31, "elapsed_time": "0:48:42", "remaining_time": "3:52:36"}
+{"current_steps": 2770, "total_steps": 15940, "loss": 0.6968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.281652508570957e-06, "epoch": 1.74, "percentage": 17.38, "elapsed_time": "0:48:48", "remaining_time": "3:52:05"}
+{"current_steps": 2780, "total_steps": 15940, "loss": 0.6718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.27654867580206e-06, "epoch": 1.74, "percentage": 17.44, "elapsed_time": "0:48:55", "remaining_time": "3:51:36"}
+{"current_steps": 2790, "total_steps": 15940, "loss": 0.6915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.271428189480405e-06, "epoch": 1.75, "percentage": 17.5, "elapsed_time": "0:49:02", "remaining_time": "3:51:10"}
+{"current_steps": 2800, "total_steps": 15940, "loss": 0.6821, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.266291069545972e-06, "epoch": 1.76, "percentage": 17.57, "elapsed_time": "0:49:09", "remaining_time": "3:50:43"}
+{"current_steps": 2800, "total_steps": 15940, "loss": null, "eval_loss": 0.6953641176223755, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.76, "percentage": 17.57, "elapsed_time": "0:49:09", "remaining_time": "3:50:43"}
+{"current_steps": 2810, "total_steps": 15940, "loss": 0.666, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.261137336003511e-06, "epoch": 1.76, "percentage": 17.63, "elapsed_time": "0:50:21", "remaining_time": "3:55:18"}
+{"current_steps": 2820, "total_steps": 15940, "loss": 0.6414, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.255967008922475e-06, "epoch": 1.77, "percentage": 17.69, "elapsed_time": "0:50:29", "remaining_time": "3:54:56"}
+{"current_steps": 2830, "total_steps": 15940, "loss": 0.7321, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.250780108436926e-06, "epoch": 1.78, "percentage": 17.75, "elapsed_time": "0:50:36", "remaining_time": "3:54:27"}
+{"current_steps": 2840, "total_steps": 15940, "loss": 0.735, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.245576654745471e-06, "epoch": 1.78, "percentage": 17.82, "elapsed_time": "0:50:45", "remaining_time": "3:54:06"}
+{"current_steps": 2850, "total_steps": 15940, "loss": 0.6809, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.24035666811118e-06, "epoch": 1.79, "percentage": 17.88, "elapsed_time": "0:50:52", "remaining_time": "3:53:38"}
+{"current_steps": 2860, "total_steps": 15940, "loss": 0.6378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.235120168861495e-06, "epoch": 1.79, "percentage": 17.94, "elapsed_time": "0:50:58", "remaining_time": "3:53:08"}
+{"current_steps": 2870, "total_steps": 15940, "loss": 0.6648, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.229867177388172e-06, "epoch": 1.8, "percentage": 18.01, "elapsed_time": "0:51:04", "remaining_time": "3:52:38"}
+{"current_steps": 2880, "total_steps": 15940, "loss": 0.6681, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.224597714147186e-06, "epoch": 1.81, "percentage": 18.07, "elapsed_time": "0:51:12", "remaining_time": "3:52:12"}
+{"current_steps": 2890, "total_steps": 15940, "loss": 0.6752, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.219311799658652e-06, "epoch": 1.81, "percentage": 18.13, "elapsed_time": "0:51:20", "remaining_time": "3:51:49"}
+{"current_steps": 2900, "total_steps": 15940, "loss": 0.6427, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.214009454506754e-06, "epoch": 1.82, "percentage": 18.19, "elapsed_time": "0:51:27", "remaining_time": "3:51:23"}
+{"current_steps": 2910, "total_steps": 15940, "loss": 0.6763, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.208690699339656e-06, "epoch": 1.83, "percentage": 18.26, "elapsed_time": "0:51:34", "remaining_time": "3:50:57"}
+{"current_steps": 2920, "total_steps": 15940, "loss": 0.6935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.203355554869428e-06, "epoch": 1.83, "percentage": 18.32, "elapsed_time": "0:51:43", "remaining_time": "3:50:38"}
+{"current_steps": 2930, "total_steps": 15940, "loss": 0.7012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.198004041871962e-06, "epoch": 1.84, "percentage": 18.38, "elapsed_time": "0:51:51", "remaining_time": "3:50:13"}
+{"current_steps": 2940, "total_steps": 15940, "loss": 0.6713, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.192636181186887e-06, "epoch": 1.84, "percentage": 18.44, "elapsed_time": "0:51:59", "remaining_time": "3:49:52"}
+{"current_steps": 2950, "total_steps": 15940, "loss": 0.6344, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.1872519937175e-06, "epoch": 1.85, "percentage": 18.51, "elapsed_time": "0:52:06", "remaining_time": "3:49:28"}
+{"current_steps": 2960, "total_steps": 15940, "loss": 0.6699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.181851500430672e-06, "epoch": 1.86, "percentage": 18.57, "elapsed_time": "0:52:15", "remaining_time": "3:49:07"}
+{"current_steps": 2970, "total_steps": 15940, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.176434722356772e-06, "epoch": 1.86, "percentage": 18.63, "elapsed_time": "0:52:22", "remaining_time": "3:48:42"}
+{"current_steps": 2980, "total_steps": 15940, "loss": 0.6491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.17100168058959e-06, "epoch": 1.87, "percentage": 18.7, "elapsed_time": "0:52:30", "remaining_time": "3:48:23"}
+{"current_steps": 2990, "total_steps": 15940, "loss": 0.6722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.165552396286236e-06, "epoch": 1.88, "percentage": 18.76, "elapsed_time": "0:52:38", "remaining_time": "3:47:59"}
+{"current_steps": 3000, "total_steps": 15940, "loss": 0.7104, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.160086890667086e-06, "epoch": 1.88, "percentage": 18.82, "elapsed_time": "0:52:46", "remaining_time": "3:47:36"}
+{"current_steps": 3000, "total_steps": 15940, "loss": null, "eval_loss": 0.6943792104721069, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.88, "percentage": 18.82, "elapsed_time": "0:52:46", "remaining_time": "3:47:36"}
+{"current_steps": 3010, "total_steps": 15940, "loss": 0.7042, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.154605185015678e-06, "epoch": 1.89, "percentage": 18.88, "elapsed_time": "0:53:58", "remaining_time": "3:51:51"}
+{"current_steps": 3020, "total_steps": 15940, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.14910730067863e-06, "epoch": 1.89, "percentage": 18.95, "elapsed_time": "0:54:04", "remaining_time": "3:51:22"}
+{"current_steps": 3030, "total_steps": 15940, "loss": 0.6721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.143593259065573e-06, "epoch": 1.9, "percentage": 19.01, "elapsed_time": "0:54:12", "remaining_time": "3:50:58"}
+{"current_steps": 3040, "total_steps": 15940, "loss": 0.6328, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.138063081649052e-06, "epoch": 1.91, "percentage": 19.07, "elapsed_time": "0:54:19", "remaining_time": "3:50:31"}
+{"current_steps": 3050, "total_steps": 15940, "loss": 0.6564, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.132516789964443e-06, "epoch": 1.91, "percentage": 19.13, "elapsed_time": "0:54:28", "remaining_time": "3:50:11"}
+{"current_steps": 3060, "total_steps": 15940, "loss": 0.6782, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.126954405609882e-06, "epoch": 1.92, "percentage": 19.2, "elapsed_time": "0:54:35", "remaining_time": "3:49:46"}
+{"current_steps": 3070, "total_steps": 15940, "loss": 0.6686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.121375950246165e-06, "epoch": 1.93, "percentage": 19.26, "elapsed_time": "0:54:43", "remaining_time": "3:49:24"}
+{"current_steps": 3080, "total_steps": 15940, "loss": 0.6445, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.115781445596676e-06, "epoch": 1.93, "percentage": 19.32, "elapsed_time": "0:54:50", "remaining_time": "3:48:58"}
+{"current_steps": 3090, "total_steps": 15940, "loss": 0.6306, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.110170913447294e-06, "epoch": 1.94, "percentage": 19.39, "elapsed_time": "0:55:00", "remaining_time": "3:48:45"}
+{"current_steps": 3100, "total_steps": 15940, "loss": 0.6465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.104544375646314e-06, "epoch": 1.94, "percentage": 19.45, "elapsed_time": "0:55:07", "remaining_time": "3:48:18"}
+{"current_steps": 3110, "total_steps": 15940, "loss": 0.6985, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.098901854104359e-06, "epoch": 1.95, "percentage": 19.51, "elapsed_time": "0:55:15", "remaining_time": "3:47:57"}
+{"current_steps": 3120, "total_steps": 15940, "loss": 0.7272, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.09324337079429e-06, "epoch": 1.96, "percentage": 19.57, "elapsed_time": "0:55:21", "remaining_time": "3:47:27"}
+{"current_steps": 3130, "total_steps": 15940, "loss": 0.6632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.08756894775114e-06, "epoch": 1.96, "percentage": 19.64, "elapsed_time": "0:55:29", "remaining_time": "3:47:07"}
+{"current_steps": 3140, "total_steps": 15940, "loss": 0.6996, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.081878607071996e-06, "epoch": 1.97, "percentage": 19.7, "elapsed_time": "0:55:36", "remaining_time": "3:46:41"}
+{"current_steps": 3150, "total_steps": 15940, "loss": 0.734, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.076172370915944e-06, "epoch": 1.98, "percentage": 19.76, "elapsed_time": "0:55:43", "remaining_time": "3:46:13"}
+{"current_steps": 3160, "total_steps": 15940, "loss": 0.6578, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.07045026150396e-06, "epoch": 1.98, "percentage": 19.82, "elapsed_time": "0:55:51", "remaining_time": "3:45:52"}
+{"current_steps": 3170, "total_steps": 15940, "loss": 0.6527, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.064712301118842e-06, "epoch": 1.99, "percentage": 19.89, "elapsed_time": "0:56:02", "remaining_time": "3:45:45"}
+{"current_steps": 3180, "total_steps": 15940, "loss": 0.6487, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.058958512105104e-06, "epoch": 1.99, "percentage": 19.95, "elapsed_time": "0:56:11", "remaining_time": "3:45:26"}
+{"current_steps": 3190, "total_steps": 15940, "loss": 0.7011, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.053188916868912e-06, "epoch": 2.0, "percentage": 20.01, "elapsed_time": "0:56:18", "remaining_time": "3:45:05"}
+{"current_steps": 3200, "total_steps": 15940, "loss": 0.6222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.04740353787797e-06, "epoch": 2.01, "percentage": 20.08, "elapsed_time": "0:56:27", "remaining_time": "3:44:44"}
+{"current_steps": 3200, "total_steps": 15940, "loss": null, "eval_loss": 0.693417489528656, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.01, "percentage": 20.08, "elapsed_time": "0:56:27", "remaining_time": "3:44:44"}
+{"current_steps": 3210, "total_steps": 15940, "loss": 0.6396, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.041602397661459e-06, "epoch": 2.01, "percentage": 20.14, "elapsed_time": "0:57:40", "remaining_time": "3:48:43"}
+{"current_steps": 3220, "total_steps": 15940, "loss": 0.6582, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.035785518809928e-06, "epoch": 2.02, "percentage": 20.2, "elapsed_time": "0:57:47", "remaining_time": "3:48:16"}
+{"current_steps": 3230, "total_steps": 15940, "loss": 0.6517, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.029952923975217e-06, "epoch": 2.03, "percentage": 20.26, "elapsed_time": "0:57:53", "remaining_time": "3:47:48"}
+{"current_steps": 3240, "total_steps": 15940, "loss": 0.6465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.024104635870368e-06, "epoch": 2.03, "percentage": 20.33, "elapsed_time": "0:58:00", "remaining_time": "3:47:23"}
+{"current_steps": 3250, "total_steps": 15940, "loss": 0.6215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.018240677269532e-06, "epoch": 2.04, "percentage": 20.39, "elapsed_time": "0:58:06", "remaining_time": "3:46:55"}
+{"current_steps": 3260, "total_steps": 15940, "loss": 0.6609, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.012361071007892e-06, "epoch": 2.05, "percentage": 20.45, "elapsed_time": "0:58:13", "remaining_time": "3:46:28"}
+{"current_steps": 3270, "total_steps": 15940, "loss": 0.6608, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.00646583998155e-06, "epoch": 2.05, "percentage": 20.51, "elapsed_time": "0:58:21", "remaining_time": "3:46:07"}
+{"current_steps": 3280, "total_steps": 15940, "loss": 0.6222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.000555007147469e-06, "epoch": 2.06, "percentage": 20.58, "elapsed_time": "0:58:28", "remaining_time": "3:45:43"}
+{"current_steps": 3290, "total_steps": 15940, "loss": 0.6363, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.994628595523358e-06, "epoch": 2.06, "percentage": 20.64, "elapsed_time": "0:58:36", "remaining_time": "3:45:19"}
+{"current_steps": 3300, "total_steps": 15940, "loss": 0.6364, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.988686628187597e-06, "epoch": 2.07, "percentage": 20.7, "elapsed_time": "0:58:45", "remaining_time": "3:45:02"}
+{"current_steps": 3310, "total_steps": 15940, "loss": 0.6211, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.98272912827914e-06, "epoch": 2.08, "percentage": 20.77, "elapsed_time": "0:58:51", "remaining_time": "3:44:36"}
+{"current_steps": 3320, "total_steps": 15940, "loss": 0.6326, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.97675611899743e-06, "epoch": 2.08, "percentage": 20.83, "elapsed_time": "0:58:58", "remaining_time": "3:44:09"}
+{"current_steps": 3330, "total_steps": 15940, "loss": 0.7006, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.970767623602299e-06, "epoch": 2.09, "percentage": 20.89, "elapsed_time": "0:59:06", "remaining_time": "3:43:48"}
+{"current_steps": 3340, "total_steps": 15940, "loss": 0.6316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.964763665413894e-06, "epoch": 2.1, "percentage": 20.95, "elapsed_time": "0:59:12", "remaining_time": "3:43:21"}
+{"current_steps": 3350, "total_steps": 15940, "loss": 0.6318, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.95874426781257e-06, "epoch": 2.1, "percentage": 21.02, "elapsed_time": "0:59:19", "remaining_time": "3:42:57"}
+{"current_steps": 3360, "total_steps": 15940, "loss": 0.6067, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.952709454238809e-06, "epoch": 2.11, "percentage": 21.08, "elapsed_time": "0:59:27", "remaining_time": "3:42:38"}
+{"current_steps": 3370, "total_steps": 15940, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.946659248193122e-06, "epoch": 2.11, "percentage": 21.14, "elapsed_time": "0:59:35", "remaining_time": "3:42:18"}
+{"current_steps": 3380, "total_steps": 15940, "loss": 0.6537, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.940593673235962e-06, "epoch": 2.12, "percentage": 21.2, "elapsed_time": "0:59:42", "remaining_time": "3:41:51"}
+{"current_steps": 3390, "total_steps": 15940, "loss": 0.6986, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.934512752987635e-06, "epoch": 2.13, "percentage": 21.27, "elapsed_time": "0:59:50", "remaining_time": "3:41:31"}
+{"current_steps": 3400, "total_steps": 15940, "loss": 0.6383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.928416511128194e-06, "epoch": 2.13, "percentage": 21.33, "elapsed_time": "0:59:58", "remaining_time": "3:41:12"}
+{"current_steps": 3400, "total_steps": 15940, "loss": null, "eval_loss": 0.6974382996559143, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.13, "percentage": 21.33, "elapsed_time": "0:59:58", "remaining_time": "3:41:12"}
+{"current_steps": 3410, "total_steps": 15940, "loss": 0.6447, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.922304971397369e-06, "epoch": 2.14, "percentage": 21.39, "elapsed_time": "1:01:11", "remaining_time": "3:44:51"}
+{"current_steps": 3420, "total_steps": 15940, "loss": 0.6441, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.916178157594453e-06, "epoch": 2.15, "percentage": 21.46, "elapsed_time": "1:01:18", "remaining_time": "3:44:26"}
+{"current_steps": 3430, "total_steps": 15940, "loss": 0.6453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.910036093578223e-06, "epoch": 2.15, "percentage": 21.52, "elapsed_time": "1:01:26", "remaining_time": "3:44:04"}
+{"current_steps": 3440, "total_steps": 15940, "loss": 0.6256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.90387880326684e-06, "epoch": 2.16, "percentage": 21.58, "elapsed_time": "1:01:34", "remaining_time": "3:43:45"}
+{"current_steps": 3450, "total_steps": 15940, "loss": 0.6244, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.897706310637766e-06, "epoch": 2.16, "percentage": 21.64, "elapsed_time": "1:01:41", "remaining_time": "3:43:18"}
+{"current_steps": 3460, "total_steps": 15940, "loss": 0.5975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.89151863972765e-06, "epoch": 2.17, "percentage": 21.71, "elapsed_time": "1:01:47", "remaining_time": "3:42:52"}
+{"current_steps": 3470, "total_steps": 15940, "loss": 0.6777, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.88531581463226e-06, "epoch": 2.18, "percentage": 21.77, "elapsed_time": "1:01:57", "remaining_time": "3:42:37"}
+{"current_steps": 3480, "total_steps": 15940, "loss": 0.6139, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.879097859506371e-06, "epoch": 2.18, "percentage": 21.83, "elapsed_time": "1:02:03", "remaining_time": "3:42:12"}
+{"current_steps": 3490, "total_steps": 15940, "loss": 0.5775, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.872864798563676e-06, "epoch": 2.19, "percentage": 21.89, "elapsed_time": "1:02:10", "remaining_time": "3:41:47"}
+{"current_steps": 3500, "total_steps": 15940, "loss": 0.624, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.866616656076696e-06, "epoch": 2.2, "percentage": 21.96, "elapsed_time": "1:02:17", "remaining_time": "3:41:24"}
+{"current_steps": 3510, "total_steps": 15940, "loss": 0.6647, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.860353456376679e-06, "epoch": 2.2, "percentage": 22.02, "elapsed_time": "1:02:23", "remaining_time": "3:40:58"}
+{"current_steps": 3520, "total_steps": 15940, "loss": 0.7081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.854075223853509e-06, "epoch": 2.21, "percentage": 22.08, "elapsed_time": "1:02:31", "remaining_time": "3:40:35"}
+{"current_steps": 3530, "total_steps": 15940, "loss": 0.6974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.847781982955613e-06, "epoch": 2.21, "percentage": 22.15, "elapsed_time": "1:02:38", "remaining_time": "3:40:14"}
+{"current_steps": 3540, "total_steps": 15940, "loss": 0.6585, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.841473758189853e-06, "epoch": 2.22, "percentage": 22.21, "elapsed_time": "1:02:46", "remaining_time": "3:39:51"}
+{"current_steps": 3550, "total_steps": 15940, "loss": 0.6116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.835150574121455e-06, "epoch": 2.23, "percentage": 22.27, "elapsed_time": "1:02:54", "remaining_time": "3:39:33"}
+{"current_steps": 3560, "total_steps": 15940, "loss": 0.6333, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.828812455373891e-06, "epoch": 2.23, "percentage": 22.33, "elapsed_time": "1:03:02", "remaining_time": "3:39:13"}
+{"current_steps": 3570, "total_steps": 15940, "loss": 0.6015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.82245942662879e-06, "epoch": 2.24, "percentage": 22.4, "elapsed_time": "1:03:09", "remaining_time": "3:38:51"}
+{"current_steps": 3580, "total_steps": 15940, "loss": 0.6719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.816091512625845e-06, "epoch": 2.25, "percentage": 22.46, "elapsed_time": "1:03:18", "remaining_time": "3:38:32"}
+{"current_steps": 3590, "total_steps": 15940, "loss": 0.6562, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.80970873816271e-06, "epoch": 2.25, "percentage": 22.52, "elapsed_time": "1:03:24", "remaining_time": "3:38:08"}
+{"current_steps": 3600, "total_steps": 15940, "loss": 0.6436, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.803311128094918e-06, "epoch": 2.26, "percentage": 22.58, "elapsed_time": "1:03:31", "remaining_time": "3:37:46"}
+{"current_steps": 3600, "total_steps": 15940, "loss": null, "eval_loss": 0.6980520486831665, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.26, "percentage": 22.58, "elapsed_time": "1:03:31", "remaining_time": "3:37:46"}
+{"current_steps": 3610, "total_steps": 15940, "loss": 0.6022, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.796898707335766e-06, "epoch": 2.26, "percentage": 22.65, "elapsed_time": "1:04:43", "remaining_time": "3:41:04"}
+{"current_steps": 3620, "total_steps": 15940, "loss": 0.6124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.790471500856229e-06, "epoch": 2.27, "percentage": 22.71, "elapsed_time": "1:04:49", "remaining_time": "3:40:38"}
+{"current_steps": 3630, "total_steps": 15940, "loss": 0.6209, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.784029533684857e-06, "epoch": 2.28, "percentage": 22.77, "elapsed_time": "1:04:57", "remaining_time": "3:40:16"}
+{"current_steps": 3640, "total_steps": 15940, "loss": 0.6179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.777572830907685e-06, "epoch": 2.28, "percentage": 22.84, "elapsed_time": "1:05:05", "remaining_time": "3:39:57"}
+{"current_steps": 3650, "total_steps": 15940, "loss": 0.6815, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.771101417668127e-06, "epoch": 2.29, "percentage": 22.9, "elapsed_time": "1:05:14", "remaining_time": "3:39:39"}
+{"current_steps": 3660, "total_steps": 15940, "loss": 0.5767, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.764615319166885e-06, "epoch": 2.3, "percentage": 22.96, "elapsed_time": "1:05:20", "remaining_time": "3:39:14"}
+{"current_steps": 3670, "total_steps": 15940, "loss": 0.6281, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.758114560661846e-06, "epoch": 2.3, "percentage": 23.02, "elapsed_time": "1:05:27", "remaining_time": "3:38:49"}
+{"current_steps": 3680, "total_steps": 15940, "loss": 0.6368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.751599167467985e-06, "epoch": 2.31, "percentage": 23.09, "elapsed_time": "1:05:34", "remaining_time": "3:38:28"}
+{"current_steps": 3690, "total_steps": 15940, "loss": 0.6503, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.745069164957265e-06, "epoch": 2.31, "percentage": 23.15, "elapsed_time": "1:05:42", "remaining_time": "3:38:09"}
+{"current_steps": 3700, "total_steps": 15940, "loss": 0.6503, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.738524578558547e-06, "epoch": 2.32, "percentage": 23.21, "elapsed_time": "1:05:50", "remaining_time": "3:37:50"}
+{"current_steps": 3710, "total_steps": 15940, "loss": 0.6412, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.731965433757474e-06, "epoch": 2.33, "percentage": 23.27, "elapsed_time": "1:05:59", "remaining_time": "3:37:31"}
+{"current_steps": 3720, "total_steps": 15940, "loss": 0.6283, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.72539175609639e-06, "epoch": 2.33, "percentage": 23.34, "elapsed_time": "1:06:06", "remaining_time": "3:37:10"}
+{"current_steps": 3730, "total_steps": 15940, "loss": 0.6316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.718803571174229e-06, "epoch": 2.34, "percentage": 23.4, "elapsed_time": "1:06:13", "remaining_time": "3:36:46"}
+{"current_steps": 3740, "total_steps": 15940, "loss": 0.6337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.712200904646417e-06, "epoch": 2.35, "percentage": 23.46, "elapsed_time": "1:06:23", "remaining_time": "3:36:32"}
+{"current_steps": 3750, "total_steps": 15940, "loss": 0.6683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.705583782224776e-06, "epoch": 2.35, "percentage": 23.53, "elapsed_time": "1:06:29", "remaining_time": "3:36:09"}
+{"current_steps": 3760, "total_steps": 15940, "loss": 0.6538, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.698952229677422e-06, "epoch": 2.36, "percentage": 23.59, "elapsed_time": "1:06:36", "remaining_time": "3:35:44"}
+{"current_steps": 3770, "total_steps": 15940, "loss": 0.6179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.692306272828661e-06, "epoch": 2.37, "percentage": 23.65, "elapsed_time": "1:06:44", "remaining_time": "3:35:25"}
+{"current_steps": 3780, "total_steps": 15940, "loss": 0.6436, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.685645937558896e-06, "epoch": 2.37, "percentage": 23.71, "elapsed_time": "1:06:50", "remaining_time": "3:35:01"}
+{"current_steps": 3790, "total_steps": 15940, "loss": 0.6242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.678971249804517e-06, "epoch": 2.38, "percentage": 23.78, "elapsed_time": "1:06:58", "remaining_time": "3:34:41"}
+{"current_steps": 3800, "total_steps": 15940, "loss": 0.6444, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.67228223555781e-06, "epoch": 2.38, "percentage": 23.84, "elapsed_time": "1:07:04", "remaining_time": "3:34:18"}
+{"current_steps": 3800, "total_steps": 15940, "loss": null, "eval_loss": 0.6968220472335815, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.38, "percentage": 23.84, "elapsed_time": "1:07:04", "remaining_time": "3:34:18"}
+{"current_steps": 3810, "total_steps": 15940, "loss": 0.6562, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.665578920866844e-06, "epoch": 2.39, "percentage": 23.9, "elapsed_time": "1:08:17", "remaining_time": "3:37:25"}
+{"current_steps": 3820, "total_steps": 15940, "loss": 0.6776, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.658861331835384e-06, "epoch": 2.4, "percentage": 23.96, "elapsed_time": "1:08:24", "remaining_time": "3:37:02"}
+{"current_steps": 3830, "total_steps": 15940, "loss": 0.6245, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.652129494622776e-06, "epoch": 2.4, "percentage": 24.03, "elapsed_time": "1:08:33", "remaining_time": "3:36:47"}
+{"current_steps": 3840, "total_steps": 15940, "loss": 0.6692, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.645383435443853e-06, "epoch": 2.41, "percentage": 24.09, "elapsed_time": "1:08:40", "remaining_time": "3:36:24"}
+{"current_steps": 3850, "total_steps": 15940, "loss": 0.6314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.638623180568829e-06, "epoch": 2.42, "percentage": 24.15, "elapsed_time": "1:08:47", "remaining_time": "3:36:00"}
+{"current_steps": 3860, "total_steps": 15940, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.631848756323198e-06, "epoch": 2.42, "percentage": 24.22, "elapsed_time": "1:08:53", "remaining_time": "3:35:36"}
+{"current_steps": 3870, "total_steps": 15940, "loss": 0.6367, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.625060189087636e-06, "epoch": 2.43, "percentage": 24.28, "elapsed_time": "1:08:59", "remaining_time": "3:35:11"}
+{"current_steps": 3880, "total_steps": 15940, "loss": 0.6262, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.618257505297887e-06, "epoch": 2.43, "percentage": 24.34, "elapsed_time": "1:09:07", "remaining_time": "3:34:51"}
+{"current_steps": 3890, "total_steps": 15940, "loss": 0.6598, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.611440731444673e-06, "epoch": 2.44, "percentage": 24.4, "elapsed_time": "1:09:14", "remaining_time": "3:34:28"}
+{"current_steps": 3900, "total_steps": 15940, "loss": 0.6465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.604609894073583e-06, "epoch": 2.45, "percentage": 24.47, "elapsed_time": "1:09:21", "remaining_time": "3:34:07"}
+{"current_steps": 3910, "total_steps": 15940, "loss": 0.665, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.597765019784972e-06, "epoch": 2.45, "percentage": 24.53, "elapsed_time": "1:09:30", "remaining_time": "3:33:52"}
+{"current_steps": 3920, "total_steps": 15940, "loss": 0.6207, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.590906135233854e-06, "epoch": 2.46, "percentage": 24.59, "elapsed_time": "1:09:38", "remaining_time": "3:33:33"}
+{"current_steps": 3930, "total_steps": 15940, "loss": 0.6626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.584033267129807e-06, "epoch": 2.47, "percentage": 24.65, "elapsed_time": "1:09:46", "remaining_time": "3:33:14"}
+{"current_steps": 3940, "total_steps": 15940, "loss": 0.6141, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.577146442236856e-06, "epoch": 2.47, "percentage": 24.72, "elapsed_time": "1:09:54", "remaining_time": "3:32:54"}
+{"current_steps": 3950, "total_steps": 15940, "loss": 0.6651, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.570245687373384e-06, "epoch": 2.48, "percentage": 24.78, "elapsed_time": "1:10:01", "remaining_time": "3:32:33"}
+{"current_steps": 3960, "total_steps": 15940, "loss": 0.6725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.563331029412013e-06, "epoch": 2.48, "percentage": 24.84, "elapsed_time": "1:10:11", "remaining_time": "3:32:19"}
+{"current_steps": 3970, "total_steps": 15940, "loss": 0.6066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.556402495279506e-06, "epoch": 2.49, "percentage": 24.91, "elapsed_time": "1:10:18", "remaining_time": "3:31:59"}
+{"current_steps": 3980, "total_steps": 15940, "loss": 0.6752, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.549460111956665e-06, "epoch": 2.5, "percentage": 24.97, "elapsed_time": "1:10:26", "remaining_time": "3:31:40"}
+{"current_steps": 3990, "total_steps": 15940, "loss": 0.6554, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.542503906478224e-06, "epoch": 2.5, "percentage": 25.03, "elapsed_time": "1:10:32", "remaining_time": "3:31:17"}
+{"current_steps": 4000, "total_steps": 15940, "loss": 0.6368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.535533905932739e-06, "epoch": 2.51, "percentage": 25.09, "elapsed_time": "1:10:41", "remaining_time": "3:30:59"}
+{"current_steps": 4000, "total_steps": 15940, "loss": null, "eval_loss": 0.6986888647079468, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.51, "percentage": 25.09, "elapsed_time": "1:10:41", "remaining_time": "3:30:59"}
+{"current_steps": 4000, "total_steps": 15940, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.51, "percentage": 25.09, "elapsed_time": "1:10:41", "remaining_time": "3:30:59"}
+{"current_steps": 282, "total_steps": 282, "loss": null, "eval_loss": 0.6943792104721069, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.51, "percentage": 100.0, "elapsed_time": "1:12:59", "remaining_time": "0:00:00"}
diff --git a/llama2_13b_peft/news_commentary_de/trainer_state.json b/llama2_13b_peft/news_commentary_de/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..84ae3ec860dd98076061bd5bffd033d1cfdcb1db
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/trainer_state.json
@@ -0,0 +1,2990 @@
+{
+ "best_metric": 0.6943792104721069,
+ "best_model_checkpoint": "ckpt/llama2_13b_fuze30_no_sys/news_commentary_de_no_sys/checkpoint-3000",
+ "epoch": 2.509410288582183,
+ "eval_steps": 200,
+ "global_step": 4000,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.01,
+ "grad_norm": 0.5409977436065674,
+ "learning_rate": 5e-06,
+ "loss": 1.3994,
+ "step": 10
+ },
+ {
+ "epoch": 0.01,
+ "grad_norm": 0.850004255771637,
+ "learning_rate": 1e-05,
+ "loss": 1.4561,
+ "step": 20
+ },
+ {
+ "epoch": 0.02,
+ "grad_norm": 0.8501812219619751,
+ "learning_rate": 9.999990264607035e-06,
+ "loss": 1.3697,
+ "step": 30
+ },
+ {
+ "epoch": 0.03,
+ "grad_norm": 0.6338475346565247,
+ "learning_rate": 9.999961058466052e-06,
+ "loss": 1.3627,
+ "step": 40
+ },
+ {
+ "epoch": 0.03,
+ "grad_norm": 0.7430967688560486,
+ "learning_rate": 9.999912381690781e-06,
+ "loss": 1.1155,
+ "step": 50
+ },
+ {
+ "epoch": 0.04,
+ "grad_norm": 0.5487976670265198,
+ "learning_rate": 9.999844234470782e-06,
+ "loss": 0.9492,
+ "step": 60
+ },
+ {
+ "epoch": 0.04,
+ "grad_norm": 0.3653506934642792,
+ "learning_rate": 9.999756617071427e-06,
+ "loss": 0.9067,
+ "step": 70
+ },
+ {
+ "epoch": 0.05,
+ "grad_norm": 0.38920339941978455,
+ "learning_rate": 9.999649529833915e-06,
+ "loss": 0.8848,
+ "step": 80
+ },
+ {
+ "epoch": 0.06,
+ "grad_norm": 0.4155251979827881,
+ "learning_rate": 9.999522973175257e-06,
+ "loss": 0.798,
+ "step": 90
+ },
+ {
+ "epoch": 0.06,
+ "grad_norm": 0.4156494438648224,
+ "learning_rate": 9.999376947588288e-06,
+ "loss": 0.8782,
+ "step": 100
+ },
+ {
+ "epoch": 0.07,
+ "grad_norm": 0.4306489825248718,
+ "learning_rate": 9.99921145364165e-06,
+ "loss": 0.8124,
+ "step": 110
+ },
+ {
+ "epoch": 0.08,
+ "grad_norm": 0.39355072379112244,
+ "learning_rate": 9.999026491979809e-06,
+ "loss": 0.838,
+ "step": 120
+ },
+ {
+ "epoch": 0.08,
+ "grad_norm": 0.4246688783168793,
+ "learning_rate": 9.99882206332303e-06,
+ "loss": 0.8383,
+ "step": 130
+ },
+ {
+ "epoch": 0.09,
+ "grad_norm": 0.47585156559944153,
+ "learning_rate": 9.99859816846739e-06,
+ "loss": 0.8705,
+ "step": 140
+ },
+ {
+ "epoch": 0.09,
+ "grad_norm": 0.48569419980049133,
+ "learning_rate": 9.998354808284774e-06,
+ "loss": 0.7872,
+ "step": 150
+ },
+ {
+ "epoch": 0.1,
+ "grad_norm": 0.5107733011245728,
+ "learning_rate": 9.998091983722862e-06,
+ "loss": 0.789,
+ "step": 160
+ },
+ {
+ "epoch": 0.11,
+ "grad_norm": 0.5669977068901062,
+ "learning_rate": 9.997809695805136e-06,
+ "loss": 0.7749,
+ "step": 170
+ },
+ {
+ "epoch": 0.11,
+ "grad_norm": 0.49600809812545776,
+ "learning_rate": 9.99750794563087e-06,
+ "loss": 0.7935,
+ "step": 180
+ },
+ {
+ "epoch": 0.12,
+ "grad_norm": 0.45251163840293884,
+ "learning_rate": 9.997186734375124e-06,
+ "loss": 0.7817,
+ "step": 190
+ },
+ {
+ "epoch": 0.13,
+ "grad_norm": 0.46742165088653564,
+ "learning_rate": 9.996846063288746e-06,
+ "loss": 0.7429,
+ "step": 200
+ },
+ {
+ "epoch": 0.13,
+ "eval_loss": 0.7712445855140686,
+ "eval_runtime": 64.6782,
+ "eval_samples_per_second": 69.575,
+ "eval_steps_per_second": 4.36,
+ "step": 200
+ },
+ {
+ "epoch": 0.13,
+ "grad_norm": 0.5643576383590698,
+ "learning_rate": 9.996485933698364e-06,
+ "loss": 0.7636,
+ "step": 210
+ },
+ {
+ "epoch": 0.14,
+ "grad_norm": 0.4915783405303955,
+ "learning_rate": 9.996106347006378e-06,
+ "loss": 0.7856,
+ "step": 220
+ },
+ {
+ "epoch": 0.14,
+ "grad_norm": 0.3926757574081421,
+ "learning_rate": 9.99570730469096e-06,
+ "loss": 0.7529,
+ "step": 230
+ },
+ {
+ "epoch": 0.15,
+ "grad_norm": 0.3297576606273651,
+ "learning_rate": 9.995288808306041e-06,
+ "loss": 0.7671,
+ "step": 240
+ },
+ {
+ "epoch": 0.16,
+ "grad_norm": 0.45379459857940674,
+ "learning_rate": 9.994850859481312e-06,
+ "loss": 0.7231,
+ "step": 250
+ },
+ {
+ "epoch": 0.16,
+ "grad_norm": 0.5688673853874207,
+ "learning_rate": 9.994393459922219e-06,
+ "loss": 0.7694,
+ "step": 260
+ },
+ {
+ "epoch": 0.17,
+ "grad_norm": 0.6590914130210876,
+ "learning_rate": 9.993916611409941e-06,
+ "loss": 0.7661,
+ "step": 270
+ },
+ {
+ "epoch": 0.18,
+ "grad_norm": 0.4207383692264557,
+ "learning_rate": 9.993420315801406e-06,
+ "loss": 0.7952,
+ "step": 280
+ },
+ {
+ "epoch": 0.18,
+ "grad_norm": 0.47460174560546875,
+ "learning_rate": 9.992904575029265e-06,
+ "loss": 0.7966,
+ "step": 290
+ },
+ {
+ "epoch": 0.19,
+ "grad_norm": 0.6118924617767334,
+ "learning_rate": 9.992369391101895e-06,
+ "loss": 0.8167,
+ "step": 300
+ },
+ {
+ "epoch": 0.19,
+ "grad_norm": 0.44934767484664917,
+ "learning_rate": 9.991814766103386e-06,
+ "loss": 0.7368,
+ "step": 310
+ },
+ {
+ "epoch": 0.2,
+ "grad_norm": 0.5106733441352844,
+ "learning_rate": 9.991240702193532e-06,
+ "loss": 0.7796,
+ "step": 320
+ },
+ {
+ "epoch": 0.21,
+ "grad_norm": 0.4405980706214905,
+ "learning_rate": 9.99064720160783e-06,
+ "loss": 0.7727,
+ "step": 330
+ },
+ {
+ "epoch": 0.21,
+ "grad_norm": 0.6010485887527466,
+ "learning_rate": 9.990034266657468e-06,
+ "loss": 0.7604,
+ "step": 340
+ },
+ {
+ "epoch": 0.22,
+ "grad_norm": 0.6098916530609131,
+ "learning_rate": 9.989401899729307e-06,
+ "loss": 0.7399,
+ "step": 350
+ },
+ {
+ "epoch": 0.23,
+ "grad_norm": 0.5837363004684448,
+ "learning_rate": 9.988750103285883e-06,
+ "loss": 0.7715,
+ "step": 360
+ },
+ {
+ "epoch": 0.23,
+ "grad_norm": 0.49089643359184265,
+ "learning_rate": 9.988078879865396e-06,
+ "loss": 0.738,
+ "step": 370
+ },
+ {
+ "epoch": 0.24,
+ "grad_norm": 0.508166491985321,
+ "learning_rate": 9.987388232081694e-06,
+ "loss": 0.8025,
+ "step": 380
+ },
+ {
+ "epoch": 0.24,
+ "grad_norm": 0.6415013074874878,
+ "learning_rate": 9.98667816262427e-06,
+ "loss": 0.7561,
+ "step": 390
+ },
+ {
+ "epoch": 0.25,
+ "grad_norm": 0.5850837230682373,
+ "learning_rate": 9.985948674258243e-06,
+ "loss": 0.7549,
+ "step": 400
+ },
+ {
+ "epoch": 0.25,
+ "eval_loss": 0.743410587310791,
+ "eval_runtime": 64.8376,
+ "eval_samples_per_second": 69.404,
+ "eval_steps_per_second": 4.349,
+ "step": 400
+ },
+ {
+ "epoch": 0.26,
+ "grad_norm": 0.627358615398407,
+ "learning_rate": 9.985199769824359e-06,
+ "loss": 0.7694,
+ "step": 410
+ },
+ {
+ "epoch": 0.26,
+ "grad_norm": 0.7586867213249207,
+ "learning_rate": 9.984431452238968e-06,
+ "loss": 0.7353,
+ "step": 420
+ },
+ {
+ "epoch": 0.27,
+ "grad_norm": 0.5713008642196655,
+ "learning_rate": 9.983643724494017e-06,
+ "loss": 0.7299,
+ "step": 430
+ },
+ {
+ "epoch": 0.28,
+ "grad_norm": 0.5664968490600586,
+ "learning_rate": 9.982836589657043e-06,
+ "loss": 0.754,
+ "step": 440
+ },
+ {
+ "epoch": 0.28,
+ "grad_norm": 0.4575900435447693,
+ "learning_rate": 9.98201005087116e-06,
+ "loss": 0.7355,
+ "step": 450
+ },
+ {
+ "epoch": 0.29,
+ "grad_norm": 0.6498897075653076,
+ "learning_rate": 9.981164111355036e-06,
+ "loss": 0.7543,
+ "step": 460
+ },
+ {
+ "epoch": 0.29,
+ "grad_norm": 0.6509144306182861,
+ "learning_rate": 9.98029877440289e-06,
+ "loss": 0.7568,
+ "step": 470
+ },
+ {
+ "epoch": 0.3,
+ "grad_norm": 0.44653260707855225,
+ "learning_rate": 9.979414043384485e-06,
+ "loss": 0.7313,
+ "step": 480
+ },
+ {
+ "epoch": 0.31,
+ "grad_norm": 0.7275229096412659,
+ "learning_rate": 9.978509921745101e-06,
+ "loss": 0.7456,
+ "step": 490
+ },
+ {
+ "epoch": 0.31,
+ "grad_norm": 0.4918762147426605,
+ "learning_rate": 9.97758641300553e-06,
+ "loss": 0.7585,
+ "step": 500
+ },
+ {
+ "epoch": 0.32,
+ "grad_norm": 0.5181304216384888,
+ "learning_rate": 9.97664352076206e-06,
+ "loss": 0.7311,
+ "step": 510
+ },
+ {
+ "epoch": 0.33,
+ "grad_norm": 0.5354281663894653,
+ "learning_rate": 9.97568124868646e-06,
+ "loss": 0.7173,
+ "step": 520
+ },
+ {
+ "epoch": 0.33,
+ "grad_norm": 0.47694316506385803,
+ "learning_rate": 9.974699600525972e-06,
+ "loss": 0.7408,
+ "step": 530
+ },
+ {
+ "epoch": 0.34,
+ "grad_norm": 0.5888867974281311,
+ "learning_rate": 9.973698580103286e-06,
+ "loss": 0.757,
+ "step": 540
+ },
+ {
+ "epoch": 0.35,
+ "grad_norm": 0.7656754851341248,
+ "learning_rate": 9.972678191316533e-06,
+ "loss": 0.7717,
+ "step": 550
+ },
+ {
+ "epoch": 0.35,
+ "grad_norm": 0.5808092355728149,
+ "learning_rate": 9.971638438139266e-06,
+ "loss": 0.7314,
+ "step": 560
+ },
+ {
+ "epoch": 0.36,
+ "grad_norm": 0.5002965331077576,
+ "learning_rate": 9.97057932462045e-06,
+ "loss": 0.7112,
+ "step": 570
+ },
+ {
+ "epoch": 0.36,
+ "grad_norm": 0.6044530272483826,
+ "learning_rate": 9.96950085488444e-06,
+ "loss": 0.7802,
+ "step": 580
+ },
+ {
+ "epoch": 0.37,
+ "grad_norm": 0.48741769790649414,
+ "learning_rate": 9.968403033130963e-06,
+ "loss": 0.7472,
+ "step": 590
+ },
+ {
+ "epoch": 0.38,
+ "grad_norm": 0.4956966042518616,
+ "learning_rate": 9.967285863635112e-06,
+ "loss": 0.7552,
+ "step": 600
+ },
+ {
+ "epoch": 0.38,
+ "eval_loss": 0.733000636100769,
+ "eval_runtime": 65.6052,
+ "eval_samples_per_second": 68.592,
+ "eval_steps_per_second": 4.298,
+ "step": 600
+ },
+ {
+ "epoch": 0.38,
+ "grad_norm": 0.528469979763031,
+ "learning_rate": 9.966149350747321e-06,
+ "loss": 0.7274,
+ "step": 610
+ },
+ {
+ "epoch": 0.39,
+ "grad_norm": 0.5717535614967346,
+ "learning_rate": 9.964993498893349e-06,
+ "loss": 0.7734,
+ "step": 620
+ },
+ {
+ "epoch": 0.4,
+ "grad_norm": 0.5049377083778381,
+ "learning_rate": 9.963818312574265e-06,
+ "loss": 0.7117,
+ "step": 630
+ },
+ {
+ "epoch": 0.4,
+ "grad_norm": 0.7002434134483337,
+ "learning_rate": 9.962623796366428e-06,
+ "loss": 0.7256,
+ "step": 640
+ },
+ {
+ "epoch": 0.41,
+ "grad_norm": 0.6600221991539001,
+ "learning_rate": 9.961409954921472e-06,
+ "loss": 0.764,
+ "step": 650
+ },
+ {
+ "epoch": 0.41,
+ "grad_norm": 0.5288920402526855,
+ "learning_rate": 9.96017679296629e-06,
+ "loss": 0.7385,
+ "step": 660
+ },
+ {
+ "epoch": 0.42,
+ "grad_norm": 0.6407844424247742,
+ "learning_rate": 9.958924315303005e-06,
+ "loss": 0.7386,
+ "step": 670
+ },
+ {
+ "epoch": 0.43,
+ "grad_norm": 0.6425316333770752,
+ "learning_rate": 9.95765252680896e-06,
+ "loss": 0.7013,
+ "step": 680
+ },
+ {
+ "epoch": 0.43,
+ "grad_norm": 0.6219075918197632,
+ "learning_rate": 9.956361432436705e-06,
+ "loss": 0.7104,
+ "step": 690
+ },
+ {
+ "epoch": 0.44,
+ "grad_norm": 0.5872789621353149,
+ "learning_rate": 9.95505103721396e-06,
+ "loss": 0.6988,
+ "step": 700
+ },
+ {
+ "epoch": 0.45,
+ "grad_norm": 0.8937903642654419,
+ "learning_rate": 9.953721346243613e-06,
+ "loss": 0.7177,
+ "step": 710
+ },
+ {
+ "epoch": 0.45,
+ "grad_norm": 0.5471718311309814,
+ "learning_rate": 9.952372364703688e-06,
+ "loss": 0.6804,
+ "step": 720
+ },
+ {
+ "epoch": 0.46,
+ "grad_norm": 0.7264242172241211,
+ "learning_rate": 9.95100409784733e-06,
+ "loss": 0.7432,
+ "step": 730
+ },
+ {
+ "epoch": 0.46,
+ "grad_norm": 0.7826697826385498,
+ "learning_rate": 9.949616551002787e-06,
+ "loss": 0.7521,
+ "step": 740
+ },
+ {
+ "epoch": 0.47,
+ "grad_norm": 0.6297461986541748,
+ "learning_rate": 9.948209729573384e-06,
+ "loss": 0.7624,
+ "step": 750
+ },
+ {
+ "epoch": 0.48,
+ "grad_norm": 0.7424671053886414,
+ "learning_rate": 9.946783639037503e-06,
+ "loss": 0.7619,
+ "step": 760
+ },
+ {
+ "epoch": 0.48,
+ "grad_norm": 0.5803889632225037,
+ "learning_rate": 9.945338284948568e-06,
+ "loss": 0.7159,
+ "step": 770
+ },
+ {
+ "epoch": 0.49,
+ "grad_norm": 0.895115077495575,
+ "learning_rate": 9.943873672935014e-06,
+ "loss": 0.7621,
+ "step": 780
+ },
+ {
+ "epoch": 0.5,
+ "grad_norm": 0.5325012803077698,
+ "learning_rate": 9.94238980870027e-06,
+ "loss": 0.6923,
+ "step": 790
+ },
+ {
+ "epoch": 0.5,
+ "grad_norm": 0.7378474473953247,
+ "learning_rate": 9.940886698022733e-06,
+ "loss": 0.7265,
+ "step": 800
+ },
+ {
+ "epoch": 0.5,
+ "eval_loss": 0.7256230711936951,
+ "eval_runtime": 70.6462,
+ "eval_samples_per_second": 63.698,
+ "eval_steps_per_second": 3.992,
+ "step": 800
+ },
+ {
+ "epoch": 0.51,
+ "grad_norm": 0.6454309225082397,
+ "learning_rate": 9.93936434675576e-06,
+ "loss": 0.6976,
+ "step": 810
+ },
+ {
+ "epoch": 0.51,
+ "grad_norm": 0.7399590015411377,
+ "learning_rate": 9.93782276082762e-06,
+ "loss": 0.7028,
+ "step": 820
+ },
+ {
+ "epoch": 0.52,
+ "grad_norm": 0.6661127209663391,
+ "learning_rate": 9.936261946241492e-06,
+ "loss": 0.7253,
+ "step": 830
+ },
+ {
+ "epoch": 0.53,
+ "grad_norm": 0.5480040907859802,
+ "learning_rate": 9.934681909075434e-06,
+ "loss": 0.7096,
+ "step": 840
+ },
+ {
+ "epoch": 0.53,
+ "grad_norm": 0.6889688968658447,
+ "learning_rate": 9.93308265548236e-06,
+ "loss": 0.745,
+ "step": 850
+ },
+ {
+ "epoch": 0.54,
+ "grad_norm": 0.6629202961921692,
+ "learning_rate": 9.931464191690015e-06,
+ "loss": 0.7111,
+ "step": 860
+ },
+ {
+ "epoch": 0.55,
+ "grad_norm": 0.5166647434234619,
+ "learning_rate": 9.929826524000948e-06,
+ "loss": 0.7296,
+ "step": 870
+ },
+ {
+ "epoch": 0.55,
+ "grad_norm": 0.6730151772499084,
+ "learning_rate": 9.928169658792498e-06,
+ "loss": 0.7387,
+ "step": 880
+ },
+ {
+ "epoch": 0.56,
+ "grad_norm": 0.6847391724586487,
+ "learning_rate": 9.926493602516758e-06,
+ "loss": 0.7156,
+ "step": 890
+ },
+ {
+ "epoch": 0.56,
+ "grad_norm": 0.7915560007095337,
+ "learning_rate": 9.924798361700554e-06,
+ "loss": 0.7956,
+ "step": 900
+ },
+ {
+ "epoch": 0.57,
+ "grad_norm": 0.5927907824516296,
+ "learning_rate": 9.923083942945419e-06,
+ "loss": 0.7361,
+ "step": 910
+ },
+ {
+ "epoch": 0.58,
+ "grad_norm": 0.7477264404296875,
+ "learning_rate": 9.92135035292757e-06,
+ "loss": 0.7091,
+ "step": 920
+ },
+ {
+ "epoch": 0.58,
+ "grad_norm": 0.7492902278900146,
+ "learning_rate": 9.919597598397882e-06,
+ "loss": 0.6967,
+ "step": 930
+ },
+ {
+ "epoch": 0.59,
+ "grad_norm": 0.7357175350189209,
+ "learning_rate": 9.91782568618185e-06,
+ "loss": 0.7509,
+ "step": 940
+ },
+ {
+ "epoch": 0.6,
+ "grad_norm": 0.6629440188407898,
+ "learning_rate": 9.916034623179584e-06,
+ "loss": 0.6999,
+ "step": 950
+ },
+ {
+ "epoch": 0.6,
+ "grad_norm": 0.5954321026802063,
+ "learning_rate": 9.914224416365765e-06,
+ "loss": 0.7194,
+ "step": 960
+ },
+ {
+ "epoch": 0.61,
+ "grad_norm": 0.9139691591262817,
+ "learning_rate": 9.91239507278962e-06,
+ "loss": 0.705,
+ "step": 970
+ },
+ {
+ "epoch": 0.61,
+ "grad_norm": 0.9337642788887024,
+ "learning_rate": 9.910546599574903e-06,
+ "loss": 0.7314,
+ "step": 980
+ },
+ {
+ "epoch": 0.62,
+ "grad_norm": 0.6616548299789429,
+ "learning_rate": 9.908679003919856e-06,
+ "loss": 0.7549,
+ "step": 990
+ },
+ {
+ "epoch": 0.63,
+ "grad_norm": 0.6958469152450562,
+ "learning_rate": 9.906792293097194e-06,
+ "loss": 0.7524,
+ "step": 1000
+ },
+ {
+ "epoch": 0.63,
+ "eval_loss": 0.7200015187263489,
+ "eval_runtime": 68.2079,
+ "eval_samples_per_second": 65.975,
+ "eval_steps_per_second": 4.134,
+ "step": 1000
+ },
+ {
+ "epoch": 0.63,
+ "grad_norm": 0.5352278351783752,
+ "learning_rate": 9.904886474454063e-06,
+ "loss": 0.7218,
+ "step": 1010
+ },
+ {
+ "epoch": 0.64,
+ "grad_norm": 0.6772333979606628,
+ "learning_rate": 9.90296155541202e-06,
+ "loss": 0.7171,
+ "step": 1020
+ },
+ {
+ "epoch": 0.65,
+ "grad_norm": 0.7102545499801636,
+ "learning_rate": 9.901017543467005e-06,
+ "loss": 0.758,
+ "step": 1030
+ },
+ {
+ "epoch": 0.65,
+ "grad_norm": 0.5379916429519653,
+ "learning_rate": 9.899054446189305e-06,
+ "loss": 0.7121,
+ "step": 1040
+ },
+ {
+ "epoch": 0.66,
+ "grad_norm": 0.6267510056495667,
+ "learning_rate": 9.897072271223526e-06,
+ "loss": 0.7088,
+ "step": 1050
+ },
+ {
+ "epoch": 0.66,
+ "grad_norm": 0.9392660856246948,
+ "learning_rate": 9.895071026288574e-06,
+ "loss": 0.7804,
+ "step": 1060
+ },
+ {
+ "epoch": 0.67,
+ "grad_norm": 0.9270221590995789,
+ "learning_rate": 9.893050719177608e-06,
+ "loss": 0.6935,
+ "step": 1070
+ },
+ {
+ "epoch": 0.68,
+ "grad_norm": 0.722115159034729,
+ "learning_rate": 9.891011357758022e-06,
+ "loss": 0.6894,
+ "step": 1080
+ },
+ {
+ "epoch": 0.68,
+ "grad_norm": 0.7055147886276245,
+ "learning_rate": 9.888952949971411e-06,
+ "loss": 0.7244,
+ "step": 1090
+ },
+ {
+ "epoch": 0.69,
+ "grad_norm": 0.7774051427841187,
+ "learning_rate": 9.886875503833537e-06,
+ "loss": 0.8156,
+ "step": 1100
+ },
+ {
+ "epoch": 0.7,
+ "grad_norm": 0.7965037226676941,
+ "learning_rate": 9.884779027434304e-06,
+ "loss": 0.7478,
+ "step": 1110
+ },
+ {
+ "epoch": 0.7,
+ "grad_norm": 0.8204682469367981,
+ "learning_rate": 9.882663528937716e-06,
+ "loss": 0.7187,
+ "step": 1120
+ },
+ {
+ "epoch": 0.71,
+ "grad_norm": 0.63904869556427,
+ "learning_rate": 9.880529016581863e-06,
+ "loss": 0.7145,
+ "step": 1130
+ },
+ {
+ "epoch": 0.72,
+ "grad_norm": 0.6523028612136841,
+ "learning_rate": 9.878375498678869e-06,
+ "loss": 0.731,
+ "step": 1140
+ },
+ {
+ "epoch": 0.72,
+ "grad_norm": 0.7148768901824951,
+ "learning_rate": 9.876202983614868e-06,
+ "loss": 0.7323,
+ "step": 1150
+ },
+ {
+ "epoch": 0.73,
+ "grad_norm": 0.6108402013778687,
+ "learning_rate": 9.874011479849981e-06,
+ "loss": 0.6757,
+ "step": 1160
+ },
+ {
+ "epoch": 0.73,
+ "grad_norm": 0.6056957244873047,
+ "learning_rate": 9.871800995918264e-06,
+ "loss": 0.7258,
+ "step": 1170
+ },
+ {
+ "epoch": 0.74,
+ "grad_norm": 0.7671077847480774,
+ "learning_rate": 9.86957154042769e-06,
+ "loss": 0.7334,
+ "step": 1180
+ },
+ {
+ "epoch": 0.75,
+ "grad_norm": 0.8327913880348206,
+ "learning_rate": 9.867323122060108e-06,
+ "loss": 0.7358,
+ "step": 1190
+ },
+ {
+ "epoch": 0.75,
+ "grad_norm": 0.7025701999664307,
+ "learning_rate": 9.865055749571215e-06,
+ "loss": 0.6976,
+ "step": 1200
+ },
+ {
+ "epoch": 0.75,
+ "eval_loss": 0.7151169180870056,
+ "eval_runtime": 64.9708,
+ "eval_samples_per_second": 69.262,
+ "eval_steps_per_second": 4.34,
+ "step": 1200
+ },
+ {
+ "epoch": 0.76,
+ "grad_norm": 0.8391425609588623,
+ "learning_rate": 9.862769431790513e-06,
+ "loss": 0.6983,
+ "step": 1210
+ },
+ {
+ "epoch": 0.77,
+ "grad_norm": 0.7243052124977112,
+ "learning_rate": 9.860464177621286e-06,
+ "loss": 0.7171,
+ "step": 1220
+ },
+ {
+ "epoch": 0.77,
+ "grad_norm": 0.6501705050468445,
+ "learning_rate": 9.858139996040554e-06,
+ "loss": 0.7206,
+ "step": 1230
+ },
+ {
+ "epoch": 0.78,
+ "grad_norm": 0.8618900179862976,
+ "learning_rate": 9.855796896099044e-06,
+ "loss": 0.7368,
+ "step": 1240
+ },
+ {
+ "epoch": 0.78,
+ "grad_norm": 0.826347291469574,
+ "learning_rate": 9.85343488692116e-06,
+ "loss": 0.7372,
+ "step": 1250
+ },
+ {
+ "epoch": 0.79,
+ "grad_norm": 0.8590556383132935,
+ "learning_rate": 9.851053977704931e-06,
+ "loss": 0.7373,
+ "step": 1260
+ },
+ {
+ "epoch": 0.8,
+ "grad_norm": 0.8719233274459839,
+ "learning_rate": 9.848654177721999e-06,
+ "loss": 0.7608,
+ "step": 1270
+ },
+ {
+ "epoch": 0.8,
+ "grad_norm": 0.7729814052581787,
+ "learning_rate": 9.846235496317556e-06,
+ "loss": 0.7227,
+ "step": 1280
+ },
+ {
+ "epoch": 0.81,
+ "grad_norm": 0.801908016204834,
+ "learning_rate": 9.843797942910328e-06,
+ "loss": 0.7415,
+ "step": 1290
+ },
+ {
+ "epoch": 0.82,
+ "grad_norm": 0.9884589910507202,
+ "learning_rate": 9.841341526992536e-06,
+ "loss": 0.7206,
+ "step": 1300
+ },
+ {
+ "epoch": 0.82,
+ "grad_norm": 0.7067356705665588,
+ "learning_rate": 9.838866258129847e-06,
+ "loss": 0.6704,
+ "step": 1310
+ },
+ {
+ "epoch": 0.83,
+ "grad_norm": 0.7258339524269104,
+ "learning_rate": 9.836372145961346e-06,
+ "loss": 0.7159,
+ "step": 1320
+ },
+ {
+ "epoch": 0.83,
+ "grad_norm": 0.8512592315673828,
+ "learning_rate": 9.833859200199498e-06,
+ "loss": 0.6916,
+ "step": 1330
+ },
+ {
+ "epoch": 0.84,
+ "grad_norm": 0.7856159210205078,
+ "learning_rate": 9.83132743063011e-06,
+ "loss": 0.7568,
+ "step": 1340
+ },
+ {
+ "epoch": 0.85,
+ "grad_norm": 0.7149519324302673,
+ "learning_rate": 9.82877684711229e-06,
+ "loss": 0.7017,
+ "step": 1350
+ },
+ {
+ "epoch": 0.85,
+ "grad_norm": 1.0214589834213257,
+ "learning_rate": 9.826207459578412e-06,
+ "loss": 0.7127,
+ "step": 1360
+ },
+ {
+ "epoch": 0.86,
+ "grad_norm": 1.0295792818069458,
+ "learning_rate": 9.823619278034073e-06,
+ "loss": 0.7013,
+ "step": 1370
+ },
+ {
+ "epoch": 0.87,
+ "grad_norm": 0.8674212694168091,
+ "learning_rate": 9.821012312558059e-06,
+ "loss": 0.6942,
+ "step": 1380
+ },
+ {
+ "epoch": 0.87,
+ "grad_norm": 0.7604880332946777,
+ "learning_rate": 9.818386573302305e-06,
+ "loss": 0.7013,
+ "step": 1390
+ },
+ {
+ "epoch": 0.88,
+ "grad_norm": 0.7863268852233887,
+ "learning_rate": 9.815742070491852e-06,
+ "loss": 0.7408,
+ "step": 1400
+ },
+ {
+ "epoch": 0.88,
+ "eval_loss": 0.7116020917892456,
+ "eval_runtime": 64.7496,
+ "eval_samples_per_second": 69.498,
+ "eval_steps_per_second": 4.355,
+ "step": 1400
+ },
+ {
+ "epoch": 0.88,
+ "grad_norm": 0.7451047301292419,
+ "learning_rate": 9.81307881442481e-06,
+ "loss": 0.7105,
+ "step": 1410
+ },
+ {
+ "epoch": 0.89,
+ "grad_norm": 0.8191768527030945,
+ "learning_rate": 9.810396815472316e-06,
+ "loss": 0.6994,
+ "step": 1420
+ },
+ {
+ "epoch": 0.9,
+ "grad_norm": 0.5049307942390442,
+ "learning_rate": 9.807696084078494e-06,
+ "loss": 0.7459,
+ "step": 1430
+ },
+ {
+ "epoch": 0.9,
+ "grad_norm": 0.762649416923523,
+ "learning_rate": 9.804976630760419e-06,
+ "loss": 0.7048,
+ "step": 1440
+ },
+ {
+ "epoch": 0.91,
+ "grad_norm": 0.8065420985221863,
+ "learning_rate": 9.802238466108068e-06,
+ "loss": 0.6975,
+ "step": 1450
+ },
+ {
+ "epoch": 0.92,
+ "grad_norm": 0.899728000164032,
+ "learning_rate": 9.799481600784286e-06,
+ "loss": 0.737,
+ "step": 1460
+ },
+ {
+ "epoch": 0.92,
+ "grad_norm": 0.7029632925987244,
+ "learning_rate": 9.796706045524738e-06,
+ "loss": 0.7236,
+ "step": 1470
+ },
+ {
+ "epoch": 0.93,
+ "grad_norm": 0.7470441460609436,
+ "learning_rate": 9.793911811137874e-06,
+ "loss": 0.6984,
+ "step": 1480
+ },
+ {
+ "epoch": 0.93,
+ "grad_norm": 0.8542289137840271,
+ "learning_rate": 9.791098908504884e-06,
+ "loss": 0.8019,
+ "step": 1490
+ },
+ {
+ "epoch": 0.94,
+ "grad_norm": 0.749045193195343,
+ "learning_rate": 9.788267348579649e-06,
+ "loss": 0.7114,
+ "step": 1500
+ },
+ {
+ "epoch": 0.95,
+ "grad_norm": 0.7834633588790894,
+ "learning_rate": 9.78541714238871e-06,
+ "loss": 0.7222,
+ "step": 1510
+ },
+ {
+ "epoch": 0.95,
+ "grad_norm": 0.8488750457763672,
+ "learning_rate": 9.782548301031218e-06,
+ "loss": 0.7434,
+ "step": 1520
+ },
+ {
+ "epoch": 0.96,
+ "grad_norm": 0.7018651962280273,
+ "learning_rate": 9.77966083567889e-06,
+ "loss": 0.7193,
+ "step": 1530
+ },
+ {
+ "epoch": 0.97,
+ "grad_norm": 0.8260754346847534,
+ "learning_rate": 9.776754757575975e-06,
+ "loss": 0.7763,
+ "step": 1540
+ },
+ {
+ "epoch": 0.97,
+ "grad_norm": 0.8732118010520935,
+ "learning_rate": 9.773830078039193e-06,
+ "loss": 0.7494,
+ "step": 1550
+ },
+ {
+ "epoch": 0.98,
+ "grad_norm": 0.9026480317115784,
+ "learning_rate": 9.77088680845771e-06,
+ "loss": 0.7078,
+ "step": 1560
+ },
+ {
+ "epoch": 0.98,
+ "grad_norm": 0.7559505105018616,
+ "learning_rate": 9.767924960293076e-06,
+ "loss": 0.7468,
+ "step": 1570
+ },
+ {
+ "epoch": 0.99,
+ "grad_norm": 0.8832489848136902,
+ "learning_rate": 9.764944545079197e-06,
+ "loss": 0.7502,
+ "step": 1580
+ },
+ {
+ "epoch": 1.0,
+ "grad_norm": 0.8065813183784485,
+ "learning_rate": 9.761945574422276e-06,
+ "loss": 0.7337,
+ "step": 1590
+ },
+ {
+ "epoch": 1.0,
+ "grad_norm": 0.6966451406478882,
+ "learning_rate": 9.758928060000779e-06,
+ "loss": 0.701,
+ "step": 1600
+ },
+ {
+ "epoch": 1.0,
+ "eval_loss": 0.7084596157073975,
+ "eval_runtime": 64.9175,
+ "eval_samples_per_second": 69.319,
+ "eval_steps_per_second": 4.344,
+ "step": 1600
+ },
+ {
+ "epoch": 1.01,
+ "grad_norm": 0.8769924640655518,
+ "learning_rate": 9.755892013565377e-06,
+ "loss": 0.7014,
+ "step": 1610
+ },
+ {
+ "epoch": 1.02,
+ "grad_norm": 0.8940397500991821,
+ "learning_rate": 9.752837446938915e-06,
+ "loss": 0.7256,
+ "step": 1620
+ },
+ {
+ "epoch": 1.02,
+ "grad_norm": 0.7818279266357422,
+ "learning_rate": 9.749764372016355e-06,
+ "loss": 0.7268,
+ "step": 1630
+ },
+ {
+ "epoch": 1.03,
+ "grad_norm": 0.7369450330734253,
+ "learning_rate": 9.746672800764734e-06,
+ "loss": 0.6968,
+ "step": 1640
+ },
+ {
+ "epoch": 1.04,
+ "grad_norm": 0.8924703001976013,
+ "learning_rate": 9.743562745223118e-06,
+ "loss": 0.7087,
+ "step": 1650
+ },
+ {
+ "epoch": 1.04,
+ "grad_norm": 1.0398907661437988,
+ "learning_rate": 9.740434217502549e-06,
+ "loss": 0.7199,
+ "step": 1660
+ },
+ {
+ "epoch": 1.05,
+ "grad_norm": 0.7427188754081726,
+ "learning_rate": 9.737287229786007e-06,
+ "loss": 0.687,
+ "step": 1670
+ },
+ {
+ "epoch": 1.05,
+ "grad_norm": 0.9230946898460388,
+ "learning_rate": 9.734121794328358e-06,
+ "loss": 0.7003,
+ "step": 1680
+ },
+ {
+ "epoch": 1.06,
+ "grad_norm": 0.8461260795593262,
+ "learning_rate": 9.730937923456303e-06,
+ "loss": 0.7329,
+ "step": 1690
+ },
+ {
+ "epoch": 1.07,
+ "grad_norm": 0.783156156539917,
+ "learning_rate": 9.727735629568335e-06,
+ "loss": 0.6924,
+ "step": 1700
+ },
+ {
+ "epoch": 1.07,
+ "grad_norm": 0.8659111261367798,
+ "learning_rate": 9.724514925134696e-06,
+ "loss": 0.7219,
+ "step": 1710
+ },
+ {
+ "epoch": 1.08,
+ "grad_norm": 0.8218225240707397,
+ "learning_rate": 9.721275822697307e-06,
+ "loss": 0.6741,
+ "step": 1720
+ },
+ {
+ "epoch": 1.09,
+ "grad_norm": 0.8807560205459595,
+ "learning_rate": 9.718018334869748e-06,
+ "loss": 0.7047,
+ "step": 1730
+ },
+ {
+ "epoch": 1.09,
+ "grad_norm": 0.9925751090049744,
+ "learning_rate": 9.714742474337187e-06,
+ "loss": 0.7156,
+ "step": 1740
+ },
+ {
+ "epoch": 1.1,
+ "grad_norm": 0.7675251364707947,
+ "learning_rate": 9.711448253856336e-06,
+ "loss": 0.6887,
+ "step": 1750
+ },
+ {
+ "epoch": 1.1,
+ "grad_norm": 0.783015251159668,
+ "learning_rate": 9.708135686255415e-06,
+ "loss": 0.7373,
+ "step": 1760
+ },
+ {
+ "epoch": 1.11,
+ "grad_norm": 0.8704028129577637,
+ "learning_rate": 9.704804784434077e-06,
+ "loss": 0.6652,
+ "step": 1770
+ },
+ {
+ "epoch": 1.12,
+ "grad_norm": 0.9532449245452881,
+ "learning_rate": 9.701455561363378e-06,
+ "loss": 0.682,
+ "step": 1780
+ },
+ {
+ "epoch": 1.12,
+ "grad_norm": 0.9703534245491028,
+ "learning_rate": 9.698088030085721e-06,
+ "loss": 0.6844,
+ "step": 1790
+ },
+ {
+ "epoch": 1.13,
+ "grad_norm": 1.031153678894043,
+ "learning_rate": 9.694702203714801e-06,
+ "loss": 0.7084,
+ "step": 1800
+ },
+ {
+ "epoch": 1.13,
+ "eval_loss": 0.705936074256897,
+ "eval_runtime": 64.9167,
+ "eval_samples_per_second": 69.32,
+ "eval_steps_per_second": 4.344,
+ "step": 1800
+ },
+ {
+ "epoch": 1.14,
+ "grad_norm": 0.8839524388313293,
+ "learning_rate": 9.691298095435559e-06,
+ "loss": 0.6897,
+ "step": 1810
+ },
+ {
+ "epoch": 1.14,
+ "grad_norm": 1.0173550844192505,
+ "learning_rate": 9.687875718504126e-06,
+ "loss": 0.6851,
+ "step": 1820
+ },
+ {
+ "epoch": 1.15,
+ "grad_norm": 1.0902131795883179,
+ "learning_rate": 9.684435086247777e-06,
+ "loss": 0.7132,
+ "step": 1830
+ },
+ {
+ "epoch": 1.15,
+ "grad_norm": 0.8699798583984375,
+ "learning_rate": 9.680976212064875e-06,
+ "loss": 0.7129,
+ "step": 1840
+ },
+ {
+ "epoch": 1.16,
+ "grad_norm": 0.879970133304596,
+ "learning_rate": 9.677499109424818e-06,
+ "loss": 0.6907,
+ "step": 1850
+ },
+ {
+ "epoch": 1.17,
+ "grad_norm": 0.9659926295280457,
+ "learning_rate": 9.674003791867993e-06,
+ "loss": 0.7327,
+ "step": 1860
+ },
+ {
+ "epoch": 1.17,
+ "grad_norm": 1.0900288820266724,
+ "learning_rate": 9.670490273005713e-06,
+ "loss": 0.7304,
+ "step": 1870
+ },
+ {
+ "epoch": 1.18,
+ "grad_norm": 0.995785117149353,
+ "learning_rate": 9.666958566520175e-06,
+ "loss": 0.7076,
+ "step": 1880
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 1.0170907974243164,
+ "learning_rate": 9.663408686164399e-06,
+ "loss": 0.691,
+ "step": 1890
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 1.1418849229812622,
+ "learning_rate": 9.659840645762176e-06,
+ "loss": 0.74,
+ "step": 1900
+ },
+ {
+ "epoch": 1.2,
+ "grad_norm": 0.7200061082839966,
+ "learning_rate": 9.656254459208015e-06,
+ "loss": 0.7295,
+ "step": 1910
+ },
+ {
+ "epoch": 1.2,
+ "grad_norm": 0.9135183095932007,
+ "learning_rate": 9.652650140467094e-06,
+ "loss": 0.651,
+ "step": 1920
+ },
+ {
+ "epoch": 1.21,
+ "grad_norm": 0.9724289774894714,
+ "learning_rate": 9.649027703575193e-06,
+ "loss": 0.7028,
+ "step": 1930
+ },
+ {
+ "epoch": 1.22,
+ "grad_norm": 0.8180338740348816,
+ "learning_rate": 9.645387162638652e-06,
+ "loss": 0.7179,
+ "step": 1940
+ },
+ {
+ "epoch": 1.22,
+ "grad_norm": 1.089158296585083,
+ "learning_rate": 9.641728531834313e-06,
+ "loss": 0.6872,
+ "step": 1950
+ },
+ {
+ "epoch": 1.23,
+ "grad_norm": 1.0048317909240723,
+ "learning_rate": 9.638051825409454e-06,
+ "loss": 0.6991,
+ "step": 1960
+ },
+ {
+ "epoch": 1.24,
+ "grad_norm": 1.1580454111099243,
+ "learning_rate": 9.634357057681749e-06,
+ "loss": 0.7183,
+ "step": 1970
+ },
+ {
+ "epoch": 1.24,
+ "grad_norm": 1.0045746564865112,
+ "learning_rate": 9.630644243039207e-06,
+ "loss": 0.6795,
+ "step": 1980
+ },
+ {
+ "epoch": 1.25,
+ "grad_norm": 0.9629393815994263,
+ "learning_rate": 9.62691339594011e-06,
+ "loss": 0.7075,
+ "step": 1990
+ },
+ {
+ "epoch": 1.25,
+ "grad_norm": 0.946081280708313,
+ "learning_rate": 9.623164530912963e-06,
+ "loss": 0.6999,
+ "step": 2000
+ },
+ {
+ "epoch": 1.25,
+ "eval_loss": 0.7040402293205261,
+ "eval_runtime": 64.9289,
+ "eval_samples_per_second": 69.307,
+ "eval_steps_per_second": 4.343,
+ "step": 2000
+ },
+ {
+ "epoch": 1.26,
+ "grad_norm": 1.0208806991577148,
+ "learning_rate": 9.619397662556434e-06,
+ "loss": 0.6947,
+ "step": 2010
+ },
+ {
+ "epoch": 1.27,
+ "grad_norm": 1.3248392343521118,
+ "learning_rate": 9.615612805539305e-06,
+ "loss": 0.7102,
+ "step": 2020
+ },
+ {
+ "epoch": 1.27,
+ "grad_norm": 0.9521629810333252,
+ "learning_rate": 9.6118099746004e-06,
+ "loss": 0.7068,
+ "step": 2030
+ },
+ {
+ "epoch": 1.28,
+ "grad_norm": 1.129441738128662,
+ "learning_rate": 9.607989184548544e-06,
+ "loss": 0.6528,
+ "step": 2040
+ },
+ {
+ "epoch": 1.29,
+ "grad_norm": 1.2303441762924194,
+ "learning_rate": 9.604150450262488e-06,
+ "loss": 0.6838,
+ "step": 2050
+ },
+ {
+ "epoch": 1.29,
+ "grad_norm": 1.433111310005188,
+ "learning_rate": 9.600293786690873e-06,
+ "loss": 0.6908,
+ "step": 2060
+ },
+ {
+ "epoch": 1.3,
+ "grad_norm": 1.11778724193573,
+ "learning_rate": 9.596419208852152e-06,
+ "loss": 0.7153,
+ "step": 2070
+ },
+ {
+ "epoch": 1.3,
+ "grad_norm": 1.3464716672897339,
+ "learning_rate": 9.592526731834536e-06,
+ "loss": 0.67,
+ "step": 2080
+ },
+ {
+ "epoch": 1.31,
+ "grad_norm": 1.0811423063278198,
+ "learning_rate": 9.588616370795947e-06,
+ "loss": 0.705,
+ "step": 2090
+ },
+ {
+ "epoch": 1.32,
+ "grad_norm": 1.2497215270996094,
+ "learning_rate": 9.584688140963945e-06,
+ "loss": 0.7037,
+ "step": 2100
+ },
+ {
+ "epoch": 1.32,
+ "grad_norm": 1.0369244813919067,
+ "learning_rate": 9.580742057635672e-06,
+ "loss": 0.7199,
+ "step": 2110
+ },
+ {
+ "epoch": 1.33,
+ "grad_norm": 0.8846107125282288,
+ "learning_rate": 9.576778136177798e-06,
+ "loss": 0.7098,
+ "step": 2120
+ },
+ {
+ "epoch": 1.34,
+ "grad_norm": 1.2424838542938232,
+ "learning_rate": 9.572796392026455e-06,
+ "loss": 0.7109,
+ "step": 2130
+ },
+ {
+ "epoch": 1.34,
+ "grad_norm": 1.1349953413009644,
+ "learning_rate": 9.568796840687184e-06,
+ "loss": 0.693,
+ "step": 2140
+ },
+ {
+ "epoch": 1.35,
+ "grad_norm": 0.9105272889137268,
+ "learning_rate": 9.564779497734864e-06,
+ "loss": 0.6679,
+ "step": 2150
+ },
+ {
+ "epoch": 1.36,
+ "grad_norm": 1.021628737449646,
+ "learning_rate": 9.56074437881366e-06,
+ "loss": 0.6573,
+ "step": 2160
+ },
+ {
+ "epoch": 1.36,
+ "grad_norm": 1.1030464172363281,
+ "learning_rate": 9.55669149963696e-06,
+ "loss": 0.705,
+ "step": 2170
+ },
+ {
+ "epoch": 1.37,
+ "grad_norm": 1.1582733392715454,
+ "learning_rate": 9.552620875987312e-06,
+ "loss": 0.6932,
+ "step": 2180
+ },
+ {
+ "epoch": 1.37,
+ "grad_norm": 1.2710620164871216,
+ "learning_rate": 9.548532523716366e-06,
+ "loss": 0.6616,
+ "step": 2190
+ },
+ {
+ "epoch": 1.38,
+ "grad_norm": 1.1528280973434448,
+ "learning_rate": 9.544426458744805e-06,
+ "loss": 0.7182,
+ "step": 2200
+ },
+ {
+ "epoch": 1.38,
+ "eval_loss": 0.7022137641906738,
+ "eval_runtime": 64.965,
+ "eval_samples_per_second": 69.268,
+ "eval_steps_per_second": 4.341,
+ "step": 2200
+ },
+ {
+ "epoch": 1.39,
+ "grad_norm": 1.1455330848693848,
+ "learning_rate": 9.540302697062294e-06,
+ "loss": 0.6878,
+ "step": 2210
+ },
+ {
+ "epoch": 1.39,
+ "grad_norm": 1.4521374702453613,
+ "learning_rate": 9.536161254727407e-06,
+ "loss": 0.6979,
+ "step": 2220
+ },
+ {
+ "epoch": 1.4,
+ "grad_norm": 1.4062340259552002,
+ "learning_rate": 9.532002147867575e-06,
+ "loss": 0.6749,
+ "step": 2230
+ },
+ {
+ "epoch": 1.41,
+ "grad_norm": 1.0267623662948608,
+ "learning_rate": 9.527825392679012e-06,
+ "loss": 0.6987,
+ "step": 2240
+ },
+ {
+ "epoch": 1.41,
+ "grad_norm": 1.0981144905090332,
+ "learning_rate": 9.523631005426658e-06,
+ "loss": 0.6888,
+ "step": 2250
+ },
+ {
+ "epoch": 1.42,
+ "grad_norm": 1.0353021621704102,
+ "learning_rate": 9.51941900244412e-06,
+ "loss": 0.6471,
+ "step": 2260
+ },
+ {
+ "epoch": 1.42,
+ "grad_norm": 1.1088558435440063,
+ "learning_rate": 9.515189400133594e-06,
+ "loss": 0.6689,
+ "step": 2270
+ },
+ {
+ "epoch": 1.43,
+ "grad_norm": 1.1822565793991089,
+ "learning_rate": 9.510942214965819e-06,
+ "loss": 0.7001,
+ "step": 2280
+ },
+ {
+ "epoch": 1.44,
+ "grad_norm": 1.2247307300567627,
+ "learning_rate": 9.506677463480003e-06,
+ "loss": 0.6999,
+ "step": 2290
+ },
+ {
+ "epoch": 1.44,
+ "grad_norm": 1.163528323173523,
+ "learning_rate": 9.50239516228376e-06,
+ "loss": 0.7008,
+ "step": 2300
+ },
+ {
+ "epoch": 1.45,
+ "grad_norm": 1.2677900791168213,
+ "learning_rate": 9.49809532805304e-06,
+ "loss": 0.7122,
+ "step": 2310
+ },
+ {
+ "epoch": 1.46,
+ "grad_norm": 1.1475526094436646,
+ "learning_rate": 9.493777977532072e-06,
+ "loss": 0.7106,
+ "step": 2320
+ },
+ {
+ "epoch": 1.46,
+ "grad_norm": 1.1459851264953613,
+ "learning_rate": 9.489443127533304e-06,
+ "loss": 0.6739,
+ "step": 2330
+ },
+ {
+ "epoch": 1.47,
+ "grad_norm": 1.2973495721817017,
+ "learning_rate": 9.485090794937319e-06,
+ "loss": 0.6888,
+ "step": 2340
+ },
+ {
+ "epoch": 1.47,
+ "grad_norm": 1.0322624444961548,
+ "learning_rate": 9.480720996692783e-06,
+ "loss": 0.6986,
+ "step": 2350
+ },
+ {
+ "epoch": 1.48,
+ "grad_norm": 1.407605767250061,
+ "learning_rate": 9.476333749816382e-06,
+ "loss": 0.7314,
+ "step": 2360
+ },
+ {
+ "epoch": 1.49,
+ "grad_norm": 1.1082048416137695,
+ "learning_rate": 9.47192907139274e-06,
+ "loss": 0.6602,
+ "step": 2370
+ },
+ {
+ "epoch": 1.49,
+ "grad_norm": 1.02568519115448,
+ "learning_rate": 9.46750697857437e-06,
+ "loss": 0.6454,
+ "step": 2380
+ },
+ {
+ "epoch": 1.5,
+ "grad_norm": 1.12267005443573,
+ "learning_rate": 9.463067488581598e-06,
+ "loss": 0.6499,
+ "step": 2390
+ },
+ {
+ "epoch": 1.51,
+ "grad_norm": 1.0023943185806274,
+ "learning_rate": 9.45861061870249e-06,
+ "loss": 0.7267,
+ "step": 2400
+ },
+ {
+ "epoch": 1.51,
+ "eval_loss": 0.6993948817253113,
+ "eval_runtime": 64.9272,
+ "eval_samples_per_second": 69.308,
+ "eval_steps_per_second": 4.343,
+ "step": 2400
+ },
+ {
+ "epoch": 1.51,
+ "grad_norm": 1.2597460746765137,
+ "learning_rate": 9.454136386292804e-06,
+ "loss": 0.6934,
+ "step": 2410
+ },
+ {
+ "epoch": 1.52,
+ "grad_norm": 1.293137788772583,
+ "learning_rate": 9.449644808775902e-06,
+ "loss": 0.7095,
+ "step": 2420
+ },
+ {
+ "epoch": 1.52,
+ "grad_norm": 1.0400352478027344,
+ "learning_rate": 9.445135903642693e-06,
+ "loss": 0.6626,
+ "step": 2430
+ },
+ {
+ "epoch": 1.53,
+ "grad_norm": 1.0873581171035767,
+ "learning_rate": 9.440609688451561e-06,
+ "loss": 0.6513,
+ "step": 2440
+ },
+ {
+ "epoch": 1.54,
+ "grad_norm": 1.0420424938201904,
+ "learning_rate": 9.4360661808283e-06,
+ "loss": 0.711,
+ "step": 2450
+ },
+ {
+ "epoch": 1.54,
+ "grad_norm": 1.3502894639968872,
+ "learning_rate": 9.431505398466045e-06,
+ "loss": 0.6991,
+ "step": 2460
+ },
+ {
+ "epoch": 1.55,
+ "grad_norm": 1.3502726554870605,
+ "learning_rate": 9.426927359125195e-06,
+ "loss": 0.7073,
+ "step": 2470
+ },
+ {
+ "epoch": 1.56,
+ "grad_norm": 1.2768748998641968,
+ "learning_rate": 9.422332080633361e-06,
+ "loss": 0.6557,
+ "step": 2480
+ },
+ {
+ "epoch": 1.56,
+ "grad_norm": 1.1925798654556274,
+ "learning_rate": 9.417719580885275e-06,
+ "loss": 0.6786,
+ "step": 2490
+ },
+ {
+ "epoch": 1.57,
+ "grad_norm": 0.9290177822113037,
+ "learning_rate": 9.413089877842735e-06,
+ "loss": 0.6159,
+ "step": 2500
+ },
+ {
+ "epoch": 1.57,
+ "grad_norm": 1.3553310632705688,
+ "learning_rate": 9.408442989534536e-06,
+ "loss": 0.7341,
+ "step": 2510
+ },
+ {
+ "epoch": 1.58,
+ "grad_norm": 0.9777106642723083,
+ "learning_rate": 9.403778934056392e-06,
+ "loss": 0.6737,
+ "step": 2520
+ },
+ {
+ "epoch": 1.59,
+ "grad_norm": 1.47153902053833,
+ "learning_rate": 9.399097729570865e-06,
+ "loss": 0.6832,
+ "step": 2530
+ },
+ {
+ "epoch": 1.59,
+ "grad_norm": 1.2370259761810303,
+ "learning_rate": 9.394399394307303e-06,
+ "loss": 0.6691,
+ "step": 2540
+ },
+ {
+ "epoch": 1.6,
+ "grad_norm": 1.2009457349777222,
+ "learning_rate": 9.38968394656176e-06,
+ "loss": 0.7072,
+ "step": 2550
+ },
+ {
+ "epoch": 1.61,
+ "grad_norm": 1.095410704612732,
+ "learning_rate": 9.384951404696933e-06,
+ "loss": 0.7068,
+ "step": 2560
+ },
+ {
+ "epoch": 1.61,
+ "grad_norm": 1.0805617570877075,
+ "learning_rate": 9.380201787142085e-06,
+ "loss": 0.6476,
+ "step": 2570
+ },
+ {
+ "epoch": 1.62,
+ "grad_norm": 1.3433113098144531,
+ "learning_rate": 9.37543511239297e-06,
+ "loss": 0.6805,
+ "step": 2580
+ },
+ {
+ "epoch": 1.62,
+ "grad_norm": 1.3151830434799194,
+ "learning_rate": 9.370651399011769e-06,
+ "loss": 0.6887,
+ "step": 2590
+ },
+ {
+ "epoch": 1.63,
+ "grad_norm": 1.3367606401443481,
+ "learning_rate": 9.365850665627016e-06,
+ "loss": 0.6912,
+ "step": 2600
+ },
+ {
+ "epoch": 1.63,
+ "eval_loss": 0.6971801519393921,
+ "eval_runtime": 65.0021,
+ "eval_samples_per_second": 69.229,
+ "eval_steps_per_second": 4.338,
+ "step": 2600
+ },
+ {
+ "epoch": 1.64,
+ "grad_norm": 1.3351305723190308,
+ "learning_rate": 9.36103293093352e-06,
+ "loss": 0.6479,
+ "step": 2610
+ },
+ {
+ "epoch": 1.64,
+ "grad_norm": 1.3986787796020508,
+ "learning_rate": 9.356198213692297e-06,
+ "loss": 0.6788,
+ "step": 2620
+ },
+ {
+ "epoch": 1.65,
+ "grad_norm": 1.0550477504730225,
+ "learning_rate": 9.351346532730499e-06,
+ "loss": 0.6481,
+ "step": 2630
+ },
+ {
+ "epoch": 1.66,
+ "grad_norm": 1.275985836982727,
+ "learning_rate": 9.346477906941331e-06,
+ "loss": 0.6893,
+ "step": 2640
+ },
+ {
+ "epoch": 1.66,
+ "grad_norm": 1.441588044166565,
+ "learning_rate": 9.341592355283986e-06,
+ "loss": 0.6784,
+ "step": 2650
+ },
+ {
+ "epoch": 1.67,
+ "grad_norm": 1.2504793405532837,
+ "learning_rate": 9.336689896783575e-06,
+ "loss": 0.6834,
+ "step": 2660
+ },
+ {
+ "epoch": 1.68,
+ "grad_norm": 1.2592806816101074,
+ "learning_rate": 9.331770550531037e-06,
+ "loss": 0.6701,
+ "step": 2670
+ },
+ {
+ "epoch": 1.68,
+ "grad_norm": 1.494611382484436,
+ "learning_rate": 9.32683433568308e-06,
+ "loss": 0.6691,
+ "step": 2680
+ },
+ {
+ "epoch": 1.69,
+ "grad_norm": 1.2938275337219238,
+ "learning_rate": 9.321881271462104e-06,
+ "loss": 0.6818,
+ "step": 2690
+ },
+ {
+ "epoch": 1.69,
+ "grad_norm": 1.5548397302627563,
+ "learning_rate": 9.316911377156116e-06,
+ "loss": 0.6852,
+ "step": 2700
+ },
+ {
+ "epoch": 1.7,
+ "grad_norm": 1.2488983869552612,
+ "learning_rate": 9.31192467211867e-06,
+ "loss": 0.6653,
+ "step": 2710
+ },
+ {
+ "epoch": 1.71,
+ "grad_norm": 1.3493934869766235,
+ "learning_rate": 9.306921175768776e-06,
+ "loss": 0.6671,
+ "step": 2720
+ },
+ {
+ "epoch": 1.71,
+ "grad_norm": 1.388487696647644,
+ "learning_rate": 9.301900907590836e-06,
+ "loss": 0.7066,
+ "step": 2730
+ },
+ {
+ "epoch": 1.72,
+ "grad_norm": 1.2521592378616333,
+ "learning_rate": 9.296863887134561e-06,
+ "loss": 0.7326,
+ "step": 2740
+ },
+ {
+ "epoch": 1.73,
+ "grad_norm": 1.0157365798950195,
+ "learning_rate": 9.291810134014904e-06,
+ "loss": 0.6758,
+ "step": 2750
+ },
+ {
+ "epoch": 1.73,
+ "grad_norm": 1.0712261199951172,
+ "learning_rate": 9.286739667911973e-06,
+ "loss": 0.6645,
+ "step": 2760
+ },
+ {
+ "epoch": 1.74,
+ "grad_norm": 1.5114517211914062,
+ "learning_rate": 9.281652508570957e-06,
+ "loss": 0.6968,
+ "step": 2770
+ },
+ {
+ "epoch": 1.74,
+ "grad_norm": 1.3408139944076538,
+ "learning_rate": 9.27654867580206e-06,
+ "loss": 0.6718,
+ "step": 2780
+ },
+ {
+ "epoch": 1.75,
+ "grad_norm": 1.3024680614471436,
+ "learning_rate": 9.271428189480405e-06,
+ "loss": 0.6915,
+ "step": 2790
+ },
+ {
+ "epoch": 1.76,
+ "grad_norm": 1.3444178104400635,
+ "learning_rate": 9.266291069545972e-06,
+ "loss": 0.6821,
+ "step": 2800
+ },
+ {
+ "epoch": 1.76,
+ "eval_loss": 0.6953641176223755,
+ "eval_runtime": 65.0162,
+ "eval_samples_per_second": 69.213,
+ "eval_steps_per_second": 4.337,
+ "step": 2800
+ },
+ {
+ "epoch": 1.76,
+ "grad_norm": 1.5429843664169312,
+ "learning_rate": 9.261137336003511e-06,
+ "loss": 0.666,
+ "step": 2810
+ },
+ {
+ "epoch": 1.77,
+ "grad_norm": 1.143649697303772,
+ "learning_rate": 9.255967008922475e-06,
+ "loss": 0.6414,
+ "step": 2820
+ },
+ {
+ "epoch": 1.78,
+ "grad_norm": 1.2989628314971924,
+ "learning_rate": 9.250780108436926e-06,
+ "loss": 0.7321,
+ "step": 2830
+ },
+ {
+ "epoch": 1.78,
+ "grad_norm": 1.4191828966140747,
+ "learning_rate": 9.245576654745471e-06,
+ "loss": 0.735,
+ "step": 2840
+ },
+ {
+ "epoch": 1.79,
+ "grad_norm": 1.5203850269317627,
+ "learning_rate": 9.24035666811118e-06,
+ "loss": 0.6809,
+ "step": 2850
+ },
+ {
+ "epoch": 1.79,
+ "grad_norm": 1.3680098056793213,
+ "learning_rate": 9.235120168861495e-06,
+ "loss": 0.6378,
+ "step": 2860
+ },
+ {
+ "epoch": 1.8,
+ "grad_norm": 1.346763014793396,
+ "learning_rate": 9.229867177388172e-06,
+ "loss": 0.6648,
+ "step": 2870
+ },
+ {
+ "epoch": 1.81,
+ "grad_norm": 1.0098768472671509,
+ "learning_rate": 9.224597714147186e-06,
+ "loss": 0.6681,
+ "step": 2880
+ },
+ {
+ "epoch": 1.81,
+ "grad_norm": 1.3174008131027222,
+ "learning_rate": 9.219311799658652e-06,
+ "loss": 0.6752,
+ "step": 2890
+ },
+ {
+ "epoch": 1.82,
+ "grad_norm": 1.692084789276123,
+ "learning_rate": 9.214009454506754e-06,
+ "loss": 0.6427,
+ "step": 2900
+ },
+ {
+ "epoch": 1.83,
+ "grad_norm": 1.3471505641937256,
+ "learning_rate": 9.208690699339656e-06,
+ "loss": 0.6763,
+ "step": 2910
+ },
+ {
+ "epoch": 1.83,
+ "grad_norm": 1.202491283416748,
+ "learning_rate": 9.203355554869428e-06,
+ "loss": 0.6935,
+ "step": 2920
+ },
+ {
+ "epoch": 1.84,
+ "grad_norm": 1.7211599349975586,
+ "learning_rate": 9.198004041871962e-06,
+ "loss": 0.7012,
+ "step": 2930
+ },
+ {
+ "epoch": 1.84,
+ "grad_norm": 1.336504578590393,
+ "learning_rate": 9.192636181186887e-06,
+ "loss": 0.6713,
+ "step": 2940
+ },
+ {
+ "epoch": 1.85,
+ "grad_norm": 1.2259244918823242,
+ "learning_rate": 9.1872519937175e-06,
+ "loss": 0.6344,
+ "step": 2950
+ },
+ {
+ "epoch": 1.86,
+ "grad_norm": 1.3948123455047607,
+ "learning_rate": 9.181851500430672e-06,
+ "loss": 0.6699,
+ "step": 2960
+ },
+ {
+ "epoch": 1.86,
+ "grad_norm": 1.2859784364700317,
+ "learning_rate": 9.176434722356772e-06,
+ "loss": 0.7029,
+ "step": 2970
+ },
+ {
+ "epoch": 1.87,
+ "grad_norm": 1.1549146175384521,
+ "learning_rate": 9.17100168058959e-06,
+ "loss": 0.6491,
+ "step": 2980
+ },
+ {
+ "epoch": 1.88,
+ "grad_norm": 1.575208306312561,
+ "learning_rate": 9.165552396286236e-06,
+ "loss": 0.6722,
+ "step": 2990
+ },
+ {
+ "epoch": 1.88,
+ "grad_norm": 1.6159918308258057,
+ "learning_rate": 9.160086890667086e-06,
+ "loss": 0.7104,
+ "step": 3000
+ },
+ {
+ "epoch": 1.88,
+ "eval_loss": 0.6943792104721069,
+ "eval_runtime": 65.034,
+ "eval_samples_per_second": 69.195,
+ "eval_steps_per_second": 4.336,
+ "step": 3000
+ },
+ {
+ "epoch": 1.89,
+ "grad_norm": 1.247308611869812,
+ "learning_rate": 9.154605185015678e-06,
+ "loss": 0.7042,
+ "step": 3010
+ },
+ {
+ "epoch": 1.89,
+ "grad_norm": 1.540644884109497,
+ "learning_rate": 9.14910730067863e-06,
+ "loss": 0.6208,
+ "step": 3020
+ },
+ {
+ "epoch": 1.9,
+ "grad_norm": 1.4479825496673584,
+ "learning_rate": 9.143593259065573e-06,
+ "loss": 0.6721,
+ "step": 3030
+ },
+ {
+ "epoch": 1.91,
+ "grad_norm": 1.5486655235290527,
+ "learning_rate": 9.138063081649052e-06,
+ "loss": 0.6328,
+ "step": 3040
+ },
+ {
+ "epoch": 1.91,
+ "grad_norm": 1.0703155994415283,
+ "learning_rate": 9.132516789964443e-06,
+ "loss": 0.6564,
+ "step": 3050
+ },
+ {
+ "epoch": 1.92,
+ "grad_norm": 1.2725510597229004,
+ "learning_rate": 9.126954405609882e-06,
+ "loss": 0.6782,
+ "step": 3060
+ },
+ {
+ "epoch": 1.93,
+ "grad_norm": 1.328399419784546,
+ "learning_rate": 9.121375950246165e-06,
+ "loss": 0.6686,
+ "step": 3070
+ },
+ {
+ "epoch": 1.93,
+ "grad_norm": 1.2014747858047485,
+ "learning_rate": 9.115781445596676e-06,
+ "loss": 0.6445,
+ "step": 3080
+ },
+ {
+ "epoch": 1.94,
+ "grad_norm": 1.3578124046325684,
+ "learning_rate": 9.110170913447294e-06,
+ "loss": 0.6306,
+ "step": 3090
+ },
+ {
+ "epoch": 1.94,
+ "grad_norm": 1.3624286651611328,
+ "learning_rate": 9.104544375646314e-06,
+ "loss": 0.6465,
+ "step": 3100
+ },
+ {
+ "epoch": 1.95,
+ "grad_norm": 1.709974765777588,
+ "learning_rate": 9.098901854104359e-06,
+ "loss": 0.6985,
+ "step": 3110
+ },
+ {
+ "epoch": 1.96,
+ "grad_norm": 1.3302754163742065,
+ "learning_rate": 9.09324337079429e-06,
+ "loss": 0.7272,
+ "step": 3120
+ },
+ {
+ "epoch": 1.96,
+ "grad_norm": 1.2946594953536987,
+ "learning_rate": 9.08756894775114e-06,
+ "loss": 0.6632,
+ "step": 3130
+ },
+ {
+ "epoch": 1.97,
+ "grad_norm": 1.2699226140975952,
+ "learning_rate": 9.081878607071996e-06,
+ "loss": 0.6996,
+ "step": 3140
+ },
+ {
+ "epoch": 1.98,
+ "grad_norm": 1.4561275243759155,
+ "learning_rate": 9.076172370915944e-06,
+ "loss": 0.734,
+ "step": 3150
+ },
+ {
+ "epoch": 1.98,
+ "grad_norm": 1.4393534660339355,
+ "learning_rate": 9.07045026150396e-06,
+ "loss": 0.6578,
+ "step": 3160
+ },
+ {
+ "epoch": 1.99,
+ "grad_norm": 1.4745630025863647,
+ "learning_rate": 9.064712301118842e-06,
+ "loss": 0.6527,
+ "step": 3170
+ },
+ {
+ "epoch": 1.99,
+ "grad_norm": 1.1444178819656372,
+ "learning_rate": 9.058958512105104e-06,
+ "loss": 0.6487,
+ "step": 3180
+ },
+ {
+ "epoch": 2.0,
+ "grad_norm": 1.433406114578247,
+ "learning_rate": 9.053188916868912e-06,
+ "loss": 0.7011,
+ "step": 3190
+ },
+ {
+ "epoch": 2.01,
+ "grad_norm": 1.218345046043396,
+ "learning_rate": 9.04740353787797e-06,
+ "loss": 0.6222,
+ "step": 3200
+ },
+ {
+ "epoch": 2.01,
+ "eval_loss": 0.693417489528656,
+ "eval_runtime": 65.021,
+ "eval_samples_per_second": 69.208,
+ "eval_steps_per_second": 4.337,
+ "step": 3200
+ },
+ {
+ "epoch": 2.01,
+ "grad_norm": 1.5473078489303589,
+ "learning_rate": 9.041602397661459e-06,
+ "loss": 0.6396,
+ "step": 3210
+ },
+ {
+ "epoch": 2.02,
+ "grad_norm": 1.3116644620895386,
+ "learning_rate": 9.035785518809928e-06,
+ "loss": 0.6582,
+ "step": 3220
+ },
+ {
+ "epoch": 2.03,
+ "grad_norm": 1.7744321823120117,
+ "learning_rate": 9.029952923975217e-06,
+ "loss": 0.6517,
+ "step": 3230
+ },
+ {
+ "epoch": 2.03,
+ "grad_norm": 1.5516449213027954,
+ "learning_rate": 9.024104635870368e-06,
+ "loss": 0.6465,
+ "step": 3240
+ },
+ {
+ "epoch": 2.04,
+ "grad_norm": 1.4612600803375244,
+ "learning_rate": 9.018240677269532e-06,
+ "loss": 0.6215,
+ "step": 3250
+ },
+ {
+ "epoch": 2.05,
+ "grad_norm": 1.7983644008636475,
+ "learning_rate": 9.012361071007892e-06,
+ "loss": 0.6609,
+ "step": 3260
+ },
+ {
+ "epoch": 2.05,
+ "grad_norm": 1.6382901668548584,
+ "learning_rate": 9.00646583998155e-06,
+ "loss": 0.6608,
+ "step": 3270
+ },
+ {
+ "epoch": 2.06,
+ "grad_norm": 1.6763097047805786,
+ "learning_rate": 9.000555007147469e-06,
+ "loss": 0.6222,
+ "step": 3280
+ },
+ {
+ "epoch": 2.06,
+ "grad_norm": 1.3221015930175781,
+ "learning_rate": 8.994628595523358e-06,
+ "loss": 0.6363,
+ "step": 3290
+ },
+ {
+ "epoch": 2.07,
+ "grad_norm": 1.5837445259094238,
+ "learning_rate": 8.988686628187597e-06,
+ "loss": 0.6364,
+ "step": 3300
+ },
+ {
+ "epoch": 2.08,
+ "grad_norm": 1.4271923303604126,
+ "learning_rate": 8.98272912827914e-06,
+ "loss": 0.6211,
+ "step": 3310
+ },
+ {
+ "epoch": 2.08,
+ "grad_norm": 1.599827527999878,
+ "learning_rate": 8.97675611899743e-06,
+ "loss": 0.6326,
+ "step": 3320
+ },
+ {
+ "epoch": 2.09,
+ "grad_norm": 1.6661384105682373,
+ "learning_rate": 8.970767623602299e-06,
+ "loss": 0.7006,
+ "step": 3330
+ },
+ {
+ "epoch": 2.1,
+ "grad_norm": 1.9200857877731323,
+ "learning_rate": 8.964763665413894e-06,
+ "loss": 0.6316,
+ "step": 3340
+ },
+ {
+ "epoch": 2.1,
+ "grad_norm": 1.8148436546325684,
+ "learning_rate": 8.95874426781257e-06,
+ "loss": 0.6318,
+ "step": 3350
+ },
+ {
+ "epoch": 2.11,
+ "grad_norm": 1.3358807563781738,
+ "learning_rate": 8.952709454238809e-06,
+ "loss": 0.6067,
+ "step": 3360
+ },
+ {
+ "epoch": 2.11,
+ "grad_norm": 1.8055490255355835,
+ "learning_rate": 8.946659248193122e-06,
+ "loss": 0.6289,
+ "step": 3370
+ },
+ {
+ "epoch": 2.12,
+ "grad_norm": 1.4589310884475708,
+ "learning_rate": 8.940593673235962e-06,
+ "loss": 0.6537,
+ "step": 3380
+ },
+ {
+ "epoch": 2.13,
+ "grad_norm": 1.903086543083191,
+ "learning_rate": 8.934512752987635e-06,
+ "loss": 0.6986,
+ "step": 3390
+ },
+ {
+ "epoch": 2.13,
+ "grad_norm": 1.722476840019226,
+ "learning_rate": 8.928416511128194e-06,
+ "loss": 0.6383,
+ "step": 3400
+ },
+ {
+ "epoch": 2.13,
+ "eval_loss": 0.6974382996559143,
+ "eval_runtime": 65.0527,
+ "eval_samples_per_second": 69.175,
+ "eval_steps_per_second": 4.335,
+ "step": 3400
+ },
+ {
+ "epoch": 2.14,
+ "grad_norm": 1.8206970691680908,
+ "learning_rate": 8.922304971397369e-06,
+ "loss": 0.6447,
+ "step": 3410
+ },
+ {
+ "epoch": 2.15,
+ "grad_norm": 1.690631628036499,
+ "learning_rate": 8.916178157594453e-06,
+ "loss": 0.6441,
+ "step": 3420
+ },
+ {
+ "epoch": 2.15,
+ "grad_norm": 2.108876943588257,
+ "learning_rate": 8.910036093578223e-06,
+ "loss": 0.6453,
+ "step": 3430
+ },
+ {
+ "epoch": 2.16,
+ "grad_norm": 1.6356040239334106,
+ "learning_rate": 8.90387880326684e-06,
+ "loss": 0.6256,
+ "step": 3440
+ },
+ {
+ "epoch": 2.16,
+ "grad_norm": 1.3783752918243408,
+ "learning_rate": 8.897706310637766e-06,
+ "loss": 0.6244,
+ "step": 3450
+ },
+ {
+ "epoch": 2.17,
+ "grad_norm": 1.901208758354187,
+ "learning_rate": 8.89151863972765e-06,
+ "loss": 0.5975,
+ "step": 3460
+ },
+ {
+ "epoch": 2.18,
+ "grad_norm": 1.595009446144104,
+ "learning_rate": 8.88531581463226e-06,
+ "loss": 0.6777,
+ "step": 3470
+ },
+ {
+ "epoch": 2.18,
+ "grad_norm": 1.6791367530822754,
+ "learning_rate": 8.879097859506371e-06,
+ "loss": 0.6139,
+ "step": 3480
+ },
+ {
+ "epoch": 2.19,
+ "grad_norm": 1.5569841861724854,
+ "learning_rate": 8.872864798563676e-06,
+ "loss": 0.5775,
+ "step": 3490
+ },
+ {
+ "epoch": 2.2,
+ "grad_norm": 1.3804527521133423,
+ "learning_rate": 8.866616656076696e-06,
+ "loss": 0.624,
+ "step": 3500
+ },
+ {
+ "epoch": 2.2,
+ "grad_norm": 1.7905445098876953,
+ "learning_rate": 8.860353456376679e-06,
+ "loss": 0.6647,
+ "step": 3510
+ },
+ {
+ "epoch": 2.21,
+ "grad_norm": 1.802614688873291,
+ "learning_rate": 8.854075223853509e-06,
+ "loss": 0.7081,
+ "step": 3520
+ },
+ {
+ "epoch": 2.21,
+ "grad_norm": 1.7135951519012451,
+ "learning_rate": 8.847781982955613e-06,
+ "loss": 0.6974,
+ "step": 3530
+ },
+ {
+ "epoch": 2.22,
+ "grad_norm": 1.8468406200408936,
+ "learning_rate": 8.841473758189853e-06,
+ "loss": 0.6585,
+ "step": 3540
+ },
+ {
+ "epoch": 2.23,
+ "grad_norm": 1.7520302534103394,
+ "learning_rate": 8.835150574121455e-06,
+ "loss": 0.6116,
+ "step": 3550
+ },
+ {
+ "epoch": 2.23,
+ "grad_norm": 1.862479329109192,
+ "learning_rate": 8.828812455373891e-06,
+ "loss": 0.6333,
+ "step": 3560
+ },
+ {
+ "epoch": 2.24,
+ "grad_norm": 1.7767084836959839,
+ "learning_rate": 8.82245942662879e-06,
+ "loss": 0.6015,
+ "step": 3570
+ },
+ {
+ "epoch": 2.25,
+ "grad_norm": 1.6162598133087158,
+ "learning_rate": 8.816091512625845e-06,
+ "loss": 0.6719,
+ "step": 3580
+ },
+ {
+ "epoch": 2.25,
+ "grad_norm": 1.8923571109771729,
+ "learning_rate": 8.80970873816271e-06,
+ "loss": 0.6562,
+ "step": 3590
+ },
+ {
+ "epoch": 2.26,
+ "grad_norm": 1.7792338132858276,
+ "learning_rate": 8.803311128094918e-06,
+ "loss": 0.6436,
+ "step": 3600
+ },
+ {
+ "epoch": 2.26,
+ "eval_loss": 0.6980520486831665,
+ "eval_runtime": 65.0239,
+ "eval_samples_per_second": 69.205,
+ "eval_steps_per_second": 4.337,
+ "step": 3600
+ },
+ {
+ "epoch": 2.26,
+ "grad_norm": 1.819449543952942,
+ "learning_rate": 8.796898707335766e-06,
+ "loss": 0.6022,
+ "step": 3610
+ },
+ {
+ "epoch": 2.27,
+ "grad_norm": 1.923462986946106,
+ "learning_rate": 8.790471500856229e-06,
+ "loss": 0.6124,
+ "step": 3620
+ },
+ {
+ "epoch": 2.28,
+ "grad_norm": 2.0969247817993164,
+ "learning_rate": 8.784029533684857e-06,
+ "loss": 0.6209,
+ "step": 3630
+ },
+ {
+ "epoch": 2.28,
+ "grad_norm": 2.014631509780884,
+ "learning_rate": 8.777572830907685e-06,
+ "loss": 0.6179,
+ "step": 3640
+ },
+ {
+ "epoch": 2.29,
+ "grad_norm": 1.7940195798873901,
+ "learning_rate": 8.771101417668127e-06,
+ "loss": 0.6815,
+ "step": 3650
+ },
+ {
+ "epoch": 2.3,
+ "grad_norm": 1.7244881391525269,
+ "learning_rate": 8.764615319166885e-06,
+ "loss": 0.5767,
+ "step": 3660
+ },
+ {
+ "epoch": 2.3,
+ "grad_norm": 2.157749652862549,
+ "learning_rate": 8.758114560661846e-06,
+ "loss": 0.6281,
+ "step": 3670
+ },
+ {
+ "epoch": 2.31,
+ "grad_norm": 1.818303108215332,
+ "learning_rate": 8.751599167467985e-06,
+ "loss": 0.6368,
+ "step": 3680
+ },
+ {
+ "epoch": 2.31,
+ "grad_norm": 1.8076434135437012,
+ "learning_rate": 8.745069164957265e-06,
+ "loss": 0.6503,
+ "step": 3690
+ },
+ {
+ "epoch": 2.32,
+ "grad_norm": 1.7755082845687866,
+ "learning_rate": 8.738524578558547e-06,
+ "loss": 0.6503,
+ "step": 3700
+ },
+ {
+ "epoch": 2.33,
+ "grad_norm": 2.0641837120056152,
+ "learning_rate": 8.731965433757474e-06,
+ "loss": 0.6412,
+ "step": 3710
+ },
+ {
+ "epoch": 2.33,
+ "grad_norm": 2.174612045288086,
+ "learning_rate": 8.72539175609639e-06,
+ "loss": 0.6283,
+ "step": 3720
+ },
+ {
+ "epoch": 2.34,
+ "grad_norm": 1.905965805053711,
+ "learning_rate": 8.718803571174229e-06,
+ "loss": 0.6316,
+ "step": 3730
+ },
+ {
+ "epoch": 2.35,
+ "grad_norm": 2.290787935256958,
+ "learning_rate": 8.712200904646417e-06,
+ "loss": 0.6337,
+ "step": 3740
+ },
+ {
+ "epoch": 2.35,
+ "grad_norm": 1.7773081064224243,
+ "learning_rate": 8.705583782224776e-06,
+ "loss": 0.6683,
+ "step": 3750
+ },
+ {
+ "epoch": 2.36,
+ "grad_norm": 1.7513020038604736,
+ "learning_rate": 8.698952229677422e-06,
+ "loss": 0.6538,
+ "step": 3760
+ },
+ {
+ "epoch": 2.37,
+ "grad_norm": 1.8641185760498047,
+ "learning_rate": 8.692306272828661e-06,
+ "loss": 0.6179,
+ "step": 3770
+ },
+ {
+ "epoch": 2.37,
+ "grad_norm": 2.4094667434692383,
+ "learning_rate": 8.685645937558896e-06,
+ "loss": 0.6436,
+ "step": 3780
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 2.295719623565674,
+ "learning_rate": 8.678971249804517e-06,
+ "loss": 0.6242,
+ "step": 3790
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 2.3604509830474854,
+ "learning_rate": 8.67228223555781e-06,
+ "loss": 0.6444,
+ "step": 3800
+ },
+ {
+ "epoch": 2.38,
+ "eval_loss": 0.6968220472335815,
+ "eval_runtime": 65.0257,
+ "eval_samples_per_second": 69.203,
+ "eval_steps_per_second": 4.337,
+ "step": 3800
+ },
+ {
+ "epoch": 2.39,
+ "grad_norm": 1.799545407295227,
+ "learning_rate": 8.665578920866844e-06,
+ "loss": 0.6562,
+ "step": 3810
+ },
+ {
+ "epoch": 2.4,
+ "grad_norm": 1.87678062915802,
+ "learning_rate": 8.658861331835384e-06,
+ "loss": 0.6776,
+ "step": 3820
+ },
+ {
+ "epoch": 2.4,
+ "grad_norm": 1.9466888904571533,
+ "learning_rate": 8.652129494622776e-06,
+ "loss": 0.6245,
+ "step": 3830
+ },
+ {
+ "epoch": 2.41,
+ "grad_norm": 1.9451625347137451,
+ "learning_rate": 8.645383435443853e-06,
+ "loss": 0.6692,
+ "step": 3840
+ },
+ {
+ "epoch": 2.42,
+ "grad_norm": 1.9275856018066406,
+ "learning_rate": 8.638623180568829e-06,
+ "loss": 0.6314,
+ "step": 3850
+ },
+ {
+ "epoch": 2.42,
+ "grad_norm": 1.7316443920135498,
+ "learning_rate": 8.631848756323198e-06,
+ "loss": 0.6289,
+ "step": 3860
+ },
+ {
+ "epoch": 2.43,
+ "grad_norm": 2.5170657634735107,
+ "learning_rate": 8.625060189087636e-06,
+ "loss": 0.6367,
+ "step": 3870
+ },
+ {
+ "epoch": 2.43,
+ "grad_norm": 2.2198870182037354,
+ "learning_rate": 8.618257505297887e-06,
+ "loss": 0.6262,
+ "step": 3880
+ },
+ {
+ "epoch": 2.44,
+ "grad_norm": 1.9207948446273804,
+ "learning_rate": 8.611440731444673e-06,
+ "loss": 0.6598,
+ "step": 3890
+ },
+ {
+ "epoch": 2.45,
+ "grad_norm": 2.0086634159088135,
+ "learning_rate": 8.604609894073583e-06,
+ "loss": 0.6465,
+ "step": 3900
+ },
+ {
+ "epoch": 2.45,
+ "grad_norm": 2.0597639083862305,
+ "learning_rate": 8.597765019784972e-06,
+ "loss": 0.665,
+ "step": 3910
+ },
+ {
+ "epoch": 2.46,
+ "grad_norm": 1.8585723638534546,
+ "learning_rate": 8.590906135233854e-06,
+ "loss": 0.6207,
+ "step": 3920
+ },
+ {
+ "epoch": 2.47,
+ "grad_norm": 2.1862194538116455,
+ "learning_rate": 8.584033267129807e-06,
+ "loss": 0.6626,
+ "step": 3930
+ },
+ {
+ "epoch": 2.47,
+ "grad_norm": 2.048553228378296,
+ "learning_rate": 8.577146442236856e-06,
+ "loss": 0.6141,
+ "step": 3940
+ },
+ {
+ "epoch": 2.48,
+ "grad_norm": 2.2547719478607178,
+ "learning_rate": 8.570245687373384e-06,
+ "loss": 0.6651,
+ "step": 3950
+ },
+ {
+ "epoch": 2.48,
+ "grad_norm": 1.9522244930267334,
+ "learning_rate": 8.563331029412013e-06,
+ "loss": 0.6725,
+ "step": 3960
+ },
+ {
+ "epoch": 2.49,
+ "grad_norm": 1.7376751899719238,
+ "learning_rate": 8.556402495279506e-06,
+ "loss": 0.6066,
+ "step": 3970
+ },
+ {
+ "epoch": 2.5,
+ "grad_norm": 1.900639295578003,
+ "learning_rate": 8.549460111956665e-06,
+ "loss": 0.6752,
+ "step": 3980
+ },
+ {
+ "epoch": 2.5,
+ "grad_norm": 2.1750218868255615,
+ "learning_rate": 8.542503906478224e-06,
+ "loss": 0.6554,
+ "step": 3990
+ },
+ {
+ "epoch": 2.51,
+ "grad_norm": 1.6551765203475952,
+ "learning_rate": 8.535533905932739e-06,
+ "loss": 0.6368,
+ "step": 4000
+ },
+ {
+ "epoch": 2.51,
+ "eval_loss": 0.6986888647079468,
+ "eval_runtime": 65.007,
+ "eval_samples_per_second": 69.223,
+ "eval_steps_per_second": 4.338,
+ "step": 4000
+ },
+ {
+ "epoch": 2.51,
+ "step": 4000,
+ "total_flos": 9.03392259225944e+17,
+ "train_loss": 0.7081527805328369,
+ "train_runtime": 4312.5386,
+ "train_samples_per_second": 59.13,
+ "train_steps_per_second": 3.696
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 15940,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 1000,
+ "total_flos": 9.03392259225944e+17,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/llama2_13b_peft/news_commentary_de/training_args.bin b/llama2_13b_peft/news_commentary_de/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e61aabdc538aa860ef0be76ab409f21896bcce75
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_de/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:190c16756cb1d693a35940640a359e26ac9a5176a57e5fc3919e9b137d670ef2
+size 5112
diff --git a/llama2_13b_peft/news_commentary_de/training_eval_loss.png b/llama2_13b_peft/news_commentary_de/training_eval_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..82dd253be4cc097686530cf0de0f84823c1046b3
Binary files /dev/null and b/llama2_13b_peft/news_commentary_de/training_eval_loss.png differ
diff --git a/llama2_13b_peft/news_commentary_de/training_loss.png b/llama2_13b_peft/news_commentary_de/training_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..6b137300f6a1622980a55115a5f0afa9777137c6
Binary files /dev/null and b/llama2_13b_peft/news_commentary_de/training_loss.png differ
diff --git a/llama2_13b_peft/news_commentary_it/README.md b/llama2_13b_peft/news_commentary_it/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..979c7cd498f684b3d656289b32fb7036d574cd07
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/README.md
@@ -0,0 +1,67 @@
+---
+license: other
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: /data1/model/llama2/meta-llama/Llama2-13b
+model-index:
+- name: news_commentary_it_no_sys
+ results: []
+---
+
+
+
+# news_commentary_it_no_sys
+
+This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the news_commentary_it_no_sys dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.6415
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 20
+- num_epochs: 5.0
+
+### Training results
+
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:------:|:-----:|:---------------:|
+| 0.7236 | 0.2807 | 2000 | 0.6710 |
+| 0.724 | 0.5614 | 4000 | 0.6521 |
+| 0.6455 | 0.8421 | 6000 | 0.6415 |
+| 0.5533 | 1.1228 | 8000 | 0.6548 |
+| 0.5192 | 1.4035 | 10000 | 0.6501 |
+| 0.4796 | 1.6842 | 12000 | 0.6500 |
+
+
+### Framework versions
+
+- PEFT 0.10.0
+- Transformers 4.40.0
+- Pytorch 2.2.1
+- Datasets 2.18.0
+- Tokenizers 0.19.1
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_it/adapter_config.json b/llama2_13b_peft/news_commentary_it/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e6019441aab7d98b76c7a4c89053d37c16d3508f
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0.0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "down_proj",
+ "up_proj",
+ "k_proj",
+ "gate_proj",
+ "q_proj",
+ "o_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_it/adapter_model.safetensors b/llama2_13b_peft/news_commentary_it/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..78cab5c59f85c013d054f0930539620891acdaaf
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edef1dfcb7b9574ec3ab4820df092cc5ff382071e97048d16d7f69d7094885d3
+size 125248064
diff --git a/llama2_13b_peft/news_commentary_it/all_results.json b/llama2_13b_peft/news_commentary_it/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab3f9de2e955481cb5f26fa6056e77bb1df25784
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/all_results.json
@@ -0,0 +1,12 @@
+{
+ "epoch": 1.6842105263157894,
+ "eval_loss": 0.6415141820907593,
+ "eval_runtime": 119.5773,
+ "eval_samples_per_second": 12.544,
+ "eval_steps_per_second": 3.136,
+ "total_flos": 6.933368738955264e+17,
+ "train_loss": 0.6038338423768679,
+ "train_runtime": 5861.7175,
+ "train_samples_per_second": 24.31,
+ "train_steps_per_second": 6.078
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_it/eval_results.json b/llama2_13b_peft/news_commentary_it/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ccc32e68e72cfd87a3aca4b688dfd1c5f2693feb
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/eval_results.json
@@ -0,0 +1,7 @@
+{
+ "epoch": 1.6842105263157894,
+ "eval_loss": 0.6415141820907593,
+ "eval_runtime": 119.5773,
+ "eval_samples_per_second": 12.544,
+ "eval_steps_per_second": 3.136
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_it/special_tokens_map.json b/llama2_13b_peft/news_commentary_it/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/llama2_13b_peft/news_commentary_it/tokenizer.model b/llama2_13b_peft/news_commentary_it/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/llama2_13b_peft/news_commentary_it/tokenizer_config.json b/llama2_13b_peft/news_commentary_it/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/tokenizer_config.json
@@ -0,0 +1,45 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}
diff --git a/llama2_13b_peft/news_commentary_it/train_results.json b/llama2_13b_peft/news_commentary_it/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..d54650ad4153608c2e0fd8787c597430eebf1c9c
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/train_results.json
@@ -0,0 +1,8 @@
+{
+ "epoch": 1.6842105263157894,
+ "total_flos": 6.933368738955264e+17,
+ "train_loss": 0.6038338423768679,
+ "train_runtime": 5861.7175,
+ "train_samples_per_second": 24.31,
+ "train_steps_per_second": 6.078
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/news_commentary_it/trainer_log.jsonl b/llama2_13b_peft/news_commentary_it/trainer_log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..a78faae2e7efd81f213aed28ad03e19246d508ef
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/trainer_log.jsonl
@@ -0,0 +1,1208 @@
+{"current_steps": 10, "total_steps": 35625, "loss": 1.3423, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5e-05, "epoch": 0.0014035087719298245, "percentage": 0.03, "elapsed_time": "0:00:05", "remaining_time": "5:16:28"}
+{"current_steps": 20, "total_steps": 35625, "loss": 1.3855, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-05, "epoch": 0.002807017543859649, "percentage": 0.06, "elapsed_time": "0:00:08", "remaining_time": "4:19:15"}
+{"current_steps": 30, "total_steps": 35625, "loss": 0.9621, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999999026832157e-05, "epoch": 0.004210526315789474, "percentage": 0.08, "elapsed_time": "0:00:12", "remaining_time": "4:01:34"}
+{"current_steps": 40, "total_steps": 35625, "loss": 0.8217, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999961073293845e-05, "epoch": 0.005614035087719298, "percentage": 0.11, "elapsed_time": "0:00:15", "remaining_time": "3:47:11"}
+{"current_steps": 50, "total_steps": 35625, "loss": 0.7743, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999912414939555e-05, "epoch": 0.007017543859649123, "percentage": 0.14, "elapsed_time": "0:00:20", "remaining_time": "4:02:42"}
+{"current_steps": 60, "total_steps": 35625, "loss": 0.7671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999844293296585e-05, "epoch": 0.008421052631578947, "percentage": 0.17, "elapsed_time": "0:00:25", "remaining_time": "4:07:49"}
+{"current_steps": 70, "total_steps": 35625, "loss": 0.7657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999975670841798e-05, "epoch": 0.009824561403508772, "percentage": 0.2, "elapsed_time": "0:00:28", "remaining_time": "3:59:45"}
+{"current_steps": 80, "total_steps": 35625, "loss": 0.6544, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999649660371906e-05, "epoch": 0.011228070175438596, "percentage": 0.22, "elapsed_time": "0:00:31", "remaining_time": "3:53:14"}
+{"current_steps": 90, "total_steps": 35625, "loss": 0.7627, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9999523149241714e-05, "epoch": 0.01263157894736842, "percentage": 0.25, "elapsed_time": "0:00:35", "remaining_time": "3:55:41"}
+{"current_steps": 100, "total_steps": 35625, "loss": 0.7428, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99993771751259e-05, "epoch": 0.014035087719298246, "percentage": 0.28, "elapsed_time": "0:00:39", "remaining_time": "3:52:59"}
+{"current_steps": 110, "total_steps": 35625, "loss": 0.7024, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999921173813812e-05, "epoch": 0.015438596491228071, "percentage": 0.31, "elapsed_time": "0:00:42", "remaining_time": "3:49:59"}
+{"current_steps": 120, "total_steps": 35625, "loss": 0.8205, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999902683840715e-05, "epoch": 0.016842105263157894, "percentage": 0.34, "elapsed_time": "0:00:46", "remaining_time": "3:48:34"}
+{"current_steps": 130, "total_steps": 35625, "loss": 0.7359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9998822476076955e-05, "epoch": 0.018245614035087718, "percentage": 0.36, "elapsed_time": "0:00:51", "remaining_time": "3:54:03"}
+{"current_steps": 140, "total_steps": 35625, "loss": 0.6837, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999859865130664e-05, "epoch": 0.019649122807017545, "percentage": 0.39, "elapsed_time": "0:00:54", "remaining_time": "3:50:53"}
+{"current_steps": 150, "total_steps": 35625, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9998355364270445e-05, "epoch": 0.021052631578947368, "percentage": 0.42, "elapsed_time": "0:00:57", "remaining_time": "3:47:49"}
+{"current_steps": 160, "total_steps": 35625, "loss": 0.7608, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999809261515779e-05, "epoch": 0.02245614035087719, "percentage": 0.45, "elapsed_time": "0:01:00", "remaining_time": "3:44:14"}
+{"current_steps": 170, "total_steps": 35625, "loss": 0.7725, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9997810404173234e-05, "epoch": 0.023859649122807018, "percentage": 0.48, "elapsed_time": "0:01:04", "remaining_time": "3:44:41"}
+{"current_steps": 180, "total_steps": 35625, "loss": 0.7884, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999750873153648e-05, "epoch": 0.02526315789473684, "percentage": 0.51, "elapsed_time": "0:01:10", "remaining_time": "3:50:58"}
+{"current_steps": 190, "total_steps": 35625, "loss": 0.7266, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9997187597482405e-05, "epoch": 0.02666666666666667, "percentage": 0.53, "elapsed_time": "0:01:13", "remaining_time": "3:48:23"}
+{"current_steps": 200, "total_steps": 35625, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9996847002261006e-05, "epoch": 0.028070175438596492, "percentage": 0.56, "elapsed_time": "0:01:16", "remaining_time": "3:46:53"}
+{"current_steps": 210, "total_steps": 35625, "loss": 0.6162, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999648694613746e-05, "epoch": 0.029473684210526315, "percentage": 0.59, "elapsed_time": "0:01:20", "remaining_time": "3:46:31"}
+{"current_steps": 220, "total_steps": 35625, "loss": 0.6696, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9996107429392083e-05, "epoch": 0.030877192982456142, "percentage": 0.62, "elapsed_time": "0:01:23", "remaining_time": "3:44:22"}
+{"current_steps": 230, "total_steps": 35625, "loss": 0.7512, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9995708452320325e-05, "epoch": 0.032280701754385965, "percentage": 0.65, "elapsed_time": "0:01:26", "remaining_time": "3:42:35"}
+{"current_steps": 240, "total_steps": 35625, "loss": 0.7137, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999529001523282e-05, "epoch": 0.03368421052631579, "percentage": 0.67, "elapsed_time": "0:01:29", "remaining_time": "3:40:42"}
+{"current_steps": 250, "total_steps": 35625, "loss": 0.7676, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9994852118455335e-05, "epoch": 0.03508771929824561, "percentage": 0.7, "elapsed_time": "0:01:33", "remaining_time": "3:40:52"}
+{"current_steps": 260, "total_steps": 35625, "loss": 0.7208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9994394762328786e-05, "epoch": 0.036491228070175435, "percentage": 0.73, "elapsed_time": "0:01:38", "remaining_time": "3:43:28"}
+{"current_steps": 270, "total_steps": 35625, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999391794720923e-05, "epoch": 0.037894736842105266, "percentage": 0.76, "elapsed_time": "0:01:41", "remaining_time": "3:41:42"}
+{"current_steps": 280, "total_steps": 35625, "loss": 0.6751, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9993421673467906e-05, "epoch": 0.03929824561403509, "percentage": 0.79, "elapsed_time": "0:01:45", "remaining_time": "3:42:22"}
+{"current_steps": 290, "total_steps": 35625, "loss": 0.7652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9992905941491155e-05, "epoch": 0.04070175438596491, "percentage": 0.81, "elapsed_time": "0:01:49", "remaining_time": "3:43:20"}
+{"current_steps": 300, "total_steps": 35625, "loss": 0.7609, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9992370751680514e-05, "epoch": 0.042105263157894736, "percentage": 0.84, "elapsed_time": "0:01:53", "remaining_time": "3:42:16"}
+{"current_steps": 310, "total_steps": 35625, "loss": 0.678, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999181610445263e-05, "epoch": 0.04350877192982456, "percentage": 0.87, "elapsed_time": "0:01:56", "remaining_time": "3:41:36"}
+{"current_steps": 320, "total_steps": 35625, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9991242000239316e-05, "epoch": 0.04491228070175438, "percentage": 0.9, "elapsed_time": "0:01:59", "remaining_time": "3:40:24"}
+{"current_steps": 330, "total_steps": 35625, "loss": 0.7034, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9990648439487544e-05, "epoch": 0.04631578947368421, "percentage": 0.93, "elapsed_time": "0:02:03", "remaining_time": "3:39:28"}
+{"current_steps": 340, "total_steps": 35625, "loss": 0.6789, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999003542265941e-05, "epoch": 0.047719298245614036, "percentage": 0.95, "elapsed_time": "0:02:07", "remaining_time": "3:40:53"}
+{"current_steps": 350, "total_steps": 35625, "loss": 0.6895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998940295023218e-05, "epoch": 0.04912280701754386, "percentage": 0.98, "elapsed_time": "0:02:11", "remaining_time": "3:41:23"}
+{"current_steps": 360, "total_steps": 35625, "loss": 0.7472, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9988751022698244e-05, "epoch": 0.05052631578947368, "percentage": 1.01, "elapsed_time": "0:02:16", "remaining_time": "3:42:40"}
+{"current_steps": 370, "total_steps": 35625, "loss": 0.7637, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9988079640565155e-05, "epoch": 0.051929824561403506, "percentage": 1.04, "elapsed_time": "0:02:21", "remaining_time": "3:43:59"}
+{"current_steps": 380, "total_steps": 35625, "loss": 0.8042, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998738880435561e-05, "epoch": 0.05333333333333334, "percentage": 1.07, "elapsed_time": "0:02:24", "remaining_time": "3:42:49"}
+{"current_steps": 390, "total_steps": 35625, "loss": 0.7878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9986678514607434e-05, "epoch": 0.05473684210526316, "percentage": 1.09, "elapsed_time": "0:02:27", "remaining_time": "3:41:40"}
+{"current_steps": 400, "total_steps": 35625, "loss": 0.6831, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998594877187362e-05, "epoch": 0.056140350877192984, "percentage": 1.12, "elapsed_time": "0:02:30", "remaining_time": "3:41:15"}
+{"current_steps": 410, "total_steps": 35625, "loss": 0.7905, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998519957672232e-05, "epoch": 0.05754385964912281, "percentage": 1.15, "elapsed_time": "0:02:35", "remaining_time": "3:43:17"}
+{"current_steps": 420, "total_steps": 35625, "loss": 0.6195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998443092973678e-05, "epoch": 0.05894736842105263, "percentage": 1.18, "elapsed_time": "0:02:39", "remaining_time": "3:42:34"}
+{"current_steps": 430, "total_steps": 35625, "loss": 0.7603, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998364283151542e-05, "epoch": 0.060350877192982454, "percentage": 1.21, "elapsed_time": "0:02:43", "remaining_time": "3:42:29"}
+{"current_steps": 440, "total_steps": 35625, "loss": 0.7099, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9982835282671816e-05, "epoch": 0.061754385964912284, "percentage": 1.24, "elapsed_time": "0:02:47", "remaining_time": "3:43:04"}
+{"current_steps": 450, "total_steps": 35625, "loss": 0.6307, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998200828383466e-05, "epoch": 0.06315789473684211, "percentage": 1.26, "elapsed_time": "0:02:51", "remaining_time": "3:43:01"}
+{"current_steps": 460, "total_steps": 35625, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99811618356478e-05, "epoch": 0.06456140350877193, "percentage": 1.29, "elapsed_time": "0:02:54", "remaining_time": "3:42:32"}
+{"current_steps": 470, "total_steps": 35625, "loss": 0.6897, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998029593877025e-05, "epoch": 0.06596491228070175, "percentage": 1.32, "elapsed_time": "0:02:59", "remaining_time": "3:43:30"}
+{"current_steps": 480, "total_steps": 35625, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9979410593876096e-05, "epoch": 0.06736842105263158, "percentage": 1.35, "elapsed_time": "0:03:03", "remaining_time": "3:43:29"}
+{"current_steps": 490, "total_steps": 35625, "loss": 0.645, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997850580165464e-05, "epoch": 0.0687719298245614, "percentage": 1.38, "elapsed_time": "0:03:07", "remaining_time": "3:44:32"}
+{"current_steps": 500, "total_steps": 35625, "loss": 0.6455, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997758156281029e-05, "epoch": 0.07017543859649122, "percentage": 1.4, "elapsed_time": "0:03:12", "remaining_time": "3:44:50"}
+{"current_steps": 510, "total_steps": 35625, "loss": 0.6797, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997663787806259e-05, "epoch": 0.07157894736842105, "percentage": 1.43, "elapsed_time": "0:03:16", "remaining_time": "3:45:49"}
+{"current_steps": 520, "total_steps": 35625, "loss": 0.7582, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997567474814623e-05, "epoch": 0.07298245614035087, "percentage": 1.46, "elapsed_time": "0:03:21", "remaining_time": "3:46:20"}
+{"current_steps": 530, "total_steps": 35625, "loss": 0.667, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997469217381105e-05, "epoch": 0.07438596491228071, "percentage": 1.49, "elapsed_time": "0:03:24", "remaining_time": "3:45:34"}
+{"current_steps": 540, "total_steps": 35625, "loss": 0.6878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997369015582201e-05, "epoch": 0.07578947368421053, "percentage": 1.52, "elapsed_time": "0:03:28", "remaining_time": "3:45:15"}
+{"current_steps": 550, "total_steps": 35625, "loss": 0.7693, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9972668694959216e-05, "epoch": 0.07719298245614035, "percentage": 1.54, "elapsed_time": "0:03:31", "remaining_time": "3:44:42"}
+{"current_steps": 560, "total_steps": 35625, "loss": 0.561, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9971627792017915e-05, "epoch": 0.07859649122807018, "percentage": 1.57, "elapsed_time": "0:03:35", "remaining_time": "3:44:43"}
+{"current_steps": 570, "total_steps": 35625, "loss": 0.6739, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997056744780848e-05, "epoch": 0.08, "percentage": 1.6, "elapsed_time": "0:03:38", "remaining_time": "3:44:27"}
+{"current_steps": 580, "total_steps": 35625, "loss": 0.6561, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9969487663156434e-05, "epoch": 0.08140350877192983, "percentage": 1.63, "elapsed_time": "0:03:42", "remaining_time": "3:44:24"}
+{"current_steps": 590, "total_steps": 35625, "loss": 0.6056, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9968388438902415e-05, "epoch": 0.08280701754385965, "percentage": 1.66, "elapsed_time": "0:03:46", "remaining_time": "3:44:08"}
+{"current_steps": 600, "total_steps": 35625, "loss": 0.6962, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9967269775902204e-05, "epoch": 0.08421052631578947, "percentage": 1.68, "elapsed_time": "0:03:51", "remaining_time": "3:45:27"}
+{"current_steps": 610, "total_steps": 35625, "loss": 0.6174, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996613167502674e-05, "epoch": 0.0856140350877193, "percentage": 1.71, "elapsed_time": "0:03:55", "remaining_time": "3:44:58"}
+{"current_steps": 620, "total_steps": 35625, "loss": 0.6806, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996497413716205e-05, "epoch": 0.08701754385964912, "percentage": 1.74, "elapsed_time": "0:03:58", "remaining_time": "3:44:53"}
+{"current_steps": 630, "total_steps": 35625, "loss": 0.7618, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996379716320933e-05, "epoch": 0.08842105263157894, "percentage": 1.77, "elapsed_time": "0:04:03", "remaining_time": "3:45:05"}
+{"current_steps": 640, "total_steps": 35625, "loss": 0.6796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996260075408489e-05, "epoch": 0.08982456140350877, "percentage": 1.8, "elapsed_time": "0:04:06", "remaining_time": "3:44:26"}
+{"current_steps": 650, "total_steps": 35625, "loss": 0.6249, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996138491072018e-05, "epoch": 0.0912280701754386, "percentage": 1.82, "elapsed_time": "0:04:10", "remaining_time": "3:44:22"}
+{"current_steps": 660, "total_steps": 35625, "loss": 0.6905, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996014963406177e-05, "epoch": 0.09263157894736843, "percentage": 1.85, "elapsed_time": "0:04:13", "remaining_time": "3:44:05"}
+{"current_steps": 670, "total_steps": 35625, "loss": 0.7082, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9958894925071364e-05, "epoch": 0.09403508771929825, "percentage": 1.88, "elapsed_time": "0:04:16", "remaining_time": "3:43:15"}
+{"current_steps": 680, "total_steps": 35625, "loss": 0.6751, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995762078472581e-05, "epoch": 0.09543859649122807, "percentage": 1.91, "elapsed_time": "0:04:20", "remaining_time": "3:42:48"}
+{"current_steps": 690, "total_steps": 35625, "loss": 0.6223, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995632721401705e-05, "epoch": 0.0968421052631579, "percentage": 1.94, "elapsed_time": "0:04:23", "remaining_time": "3:42:06"}
+{"current_steps": 700, "total_steps": 35625, "loss": 0.622, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995501421395219e-05, "epoch": 0.09824561403508772, "percentage": 1.96, "elapsed_time": "0:04:27", "remaining_time": "3:42:15"}
+{"current_steps": 710, "total_steps": 35625, "loss": 0.7565, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995368178555343e-05, "epoch": 0.09964912280701754, "percentage": 1.99, "elapsed_time": "0:04:30", "remaining_time": "3:41:31"}
+{"current_steps": 720, "total_steps": 35625, "loss": 0.7486, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9952329929858125e-05, "epoch": 0.10105263157894737, "percentage": 2.02, "elapsed_time": "0:04:33", "remaining_time": "3:40:55"}
+{"current_steps": 730, "total_steps": 35625, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995095864791873e-05, "epoch": 0.10245614035087719, "percentage": 2.05, "elapsed_time": "0:04:37", "remaining_time": "3:41:14"}
+{"current_steps": 740, "total_steps": 35625, "loss": 0.7342, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994956794080285e-05, "epoch": 0.10385964912280701, "percentage": 2.08, "elapsed_time": "0:04:43", "remaining_time": "3:42:22"}
+{"current_steps": 750, "total_steps": 35625, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994815780959318e-05, "epoch": 0.10526315789473684, "percentage": 2.11, "elapsed_time": "0:04:48", "remaining_time": "3:43:24"}
+{"current_steps": 760, "total_steps": 35625, "loss": 0.5675, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994672825538757e-05, "epoch": 0.10666666666666667, "percentage": 2.13, "elapsed_time": "0:04:52", "remaining_time": "3:43:26"}
+{"current_steps": 770, "total_steps": 35625, "loss": 0.7527, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994527927929897e-05, "epoch": 0.1080701754385965, "percentage": 2.16, "elapsed_time": "0:04:56", "remaining_time": "3:43:20"}
+{"current_steps": 780, "total_steps": 35625, "loss": 0.6421, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9943810882455454e-05, "epoch": 0.10947368421052632, "percentage": 2.19, "elapsed_time": "0:04:59", "remaining_time": "3:43:14"}
+{"current_steps": 790, "total_steps": 35625, "loss": 0.7016, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994232306600023e-05, "epoch": 0.11087719298245614, "percentage": 2.22, "elapsed_time": "0:05:04", "remaining_time": "3:43:41"}
+{"current_steps": 800, "total_steps": 35625, "loss": 0.6911, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99408158310916e-05, "epoch": 0.11228070175438597, "percentage": 2.25, "elapsed_time": "0:05:07", "remaining_time": "3:43:21"}
+{"current_steps": 810, "total_steps": 35625, "loss": 0.697, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9939289178903016e-05, "epoch": 0.11368421052631579, "percentage": 2.27, "elapsed_time": "0:05:11", "remaining_time": "3:43:19"}
+{"current_steps": 820, "total_steps": 35625, "loss": 0.5691, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993774311062301e-05, "epoch": 0.11508771929824561, "percentage": 2.3, "elapsed_time": "0:05:14", "remaining_time": "3:42:43"}
+{"current_steps": 830, "total_steps": 35625, "loss": 0.7744, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993617762745526e-05, "epoch": 0.11649122807017544, "percentage": 2.33, "elapsed_time": "0:05:17", "remaining_time": "3:42:09"}
+{"current_steps": 840, "total_steps": 35625, "loss": 0.7652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993459273061855e-05, "epoch": 0.11789473684210526, "percentage": 2.36, "elapsed_time": "0:05:21", "remaining_time": "3:41:36"}
+{"current_steps": 850, "total_steps": 35625, "loss": 0.6843, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993298842134677e-05, "epoch": 0.11929824561403508, "percentage": 2.39, "elapsed_time": "0:05:24", "remaining_time": "3:41:30"}
+{"current_steps": 860, "total_steps": 35625, "loss": 0.7147, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993136470088894e-05, "epoch": 0.12070175438596491, "percentage": 2.41, "elapsed_time": "0:05:28", "remaining_time": "3:41:27"}
+{"current_steps": 870, "total_steps": 35625, "loss": 0.641, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992972157050916e-05, "epoch": 0.12210526315789473, "percentage": 2.44, "elapsed_time": "0:05:32", "remaining_time": "3:41:41"}
+{"current_steps": 880, "total_steps": 35625, "loss": 0.6212, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992805903148669e-05, "epoch": 0.12350877192982457, "percentage": 2.47, "elapsed_time": "0:05:35", "remaining_time": "3:41:01"}
+{"current_steps": 890, "total_steps": 35625, "loss": 0.6817, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992637708511586e-05, "epoch": 0.12491228070175439, "percentage": 2.5, "elapsed_time": "0:05:40", "remaining_time": "3:41:17"}
+{"current_steps": 900, "total_steps": 35625, "loss": 0.7072, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9924675732706123e-05, "epoch": 0.12631578947368421, "percentage": 2.53, "elapsed_time": "0:05:44", "remaining_time": "3:41:15"}
+{"current_steps": 910, "total_steps": 35625, "loss": 0.6221, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992295497558204e-05, "epoch": 0.12771929824561404, "percentage": 2.55, "elapsed_time": "0:05:49", "remaining_time": "3:42:02"}
+{"current_steps": 920, "total_steps": 35625, "loss": 0.6162, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992121481508328e-05, "epoch": 0.12912280701754386, "percentage": 2.58, "elapsed_time": "0:05:52", "remaining_time": "3:41:36"}
+{"current_steps": 930, "total_steps": 35625, "loss": 0.7548, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9919455252564624e-05, "epoch": 0.13052631578947368, "percentage": 2.61, "elapsed_time": "0:05:55", "remaining_time": "3:41:12"}
+{"current_steps": 940, "total_steps": 35625, "loss": 0.6377, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991767628939594e-05, "epoch": 0.1319298245614035, "percentage": 2.64, "elapsed_time": "0:05:58", "remaining_time": "3:40:42"}
+{"current_steps": 950, "total_steps": 35625, "loss": 0.6971, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991587792696223e-05, "epoch": 0.13333333333333333, "percentage": 2.67, "elapsed_time": "0:06:03", "remaining_time": "3:41:04"}
+{"current_steps": 960, "total_steps": 35625, "loss": 0.7929, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991406016666356e-05, "epoch": 0.13473684210526315, "percentage": 2.69, "elapsed_time": "0:06:06", "remaining_time": "3:40:29"}
+{"current_steps": 970, "total_steps": 35625, "loss": 0.7556, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9912223009915126e-05, "epoch": 0.13614035087719298, "percentage": 2.72, "elapsed_time": "0:06:09", "remaining_time": "3:39:56"}
+{"current_steps": 980, "total_steps": 35625, "loss": 0.5883, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991036645814722e-05, "epoch": 0.1375438596491228, "percentage": 2.75, "elapsed_time": "0:06:12", "remaining_time": "3:39:28"}
+{"current_steps": 990, "total_steps": 35625, "loss": 0.6362, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9908490512805236e-05, "epoch": 0.13894736842105262, "percentage": 2.78, "elapsed_time": "0:06:16", "remaining_time": "3:39:41"}
+{"current_steps": 1000, "total_steps": 35625, "loss": 0.7057, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990659517534966e-05, "epoch": 0.14035087719298245, "percentage": 2.81, "elapsed_time": "0:06:21", "remaining_time": "3:39:52"}
+{"current_steps": 1010, "total_steps": 35625, "loss": 0.6051, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990468044725606e-05, "epoch": 0.14175438596491227, "percentage": 2.84, "elapsed_time": "0:06:25", "remaining_time": "3:40:04"}
+{"current_steps": 1020, "total_steps": 35625, "loss": 0.7434, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990274633001514e-05, "epoch": 0.1431578947368421, "percentage": 2.86, "elapsed_time": "0:06:28", "remaining_time": "3:39:42"}
+{"current_steps": 1030, "total_steps": 35625, "loss": 0.6681, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990079282513266e-05, "epoch": 0.14456140350877192, "percentage": 2.89, "elapsed_time": "0:06:32", "remaining_time": "3:39:49"}
+{"current_steps": 1040, "total_steps": 35625, "loss": 0.6655, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9898819934129506e-05, "epoch": 0.14596491228070174, "percentage": 2.92, "elapsed_time": "0:06:38", "remaining_time": "3:40:37"}
+{"current_steps": 1050, "total_steps": 35625, "loss": 0.7529, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989682765854163e-05, "epoch": 0.14736842105263157, "percentage": 2.95, "elapsed_time": "0:06:41", "remaining_time": "3:40:26"}
+{"current_steps": 1060, "total_steps": 35625, "loss": 0.6249, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989481599992009e-05, "epoch": 0.14877192982456142, "percentage": 2.98, "elapsed_time": "0:06:44", "remaining_time": "3:39:53"}
+{"current_steps": 1070, "total_steps": 35625, "loss": 0.6437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989278495983103e-05, "epoch": 0.15017543859649124, "percentage": 3.0, "elapsed_time": "0:06:48", "remaining_time": "3:40:05"}
+{"current_steps": 1080, "total_steps": 35625, "loss": 0.6206, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989073453985569e-05, "epoch": 0.15157894736842106, "percentage": 3.03, "elapsed_time": "0:06:52", "remaining_time": "3:40:05"}
+{"current_steps": 1090, "total_steps": 35625, "loss": 0.6141, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.988866474159037e-05, "epoch": 0.1529824561403509, "percentage": 3.06, "elapsed_time": "0:06:56", "remaining_time": "3:39:51"}
+{"current_steps": 1100, "total_steps": 35625, "loss": 0.6653, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.988657556664652e-05, "epoch": 0.1543859649122807, "percentage": 3.09, "elapsed_time": "0:06:59", "remaining_time": "3:39:34"}
+{"current_steps": 1110, "total_steps": 35625, "loss": 0.6503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.98844670166506e-05, "epoch": 0.15578947368421053, "percentage": 3.12, "elapsed_time": "0:07:03", "remaining_time": "3:39:15"}
+{"current_steps": 1120, "total_steps": 35625, "loss": 0.6298, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.98823390932442e-05, "epoch": 0.15719298245614036, "percentage": 3.14, "elapsed_time": "0:07:06", "remaining_time": "3:38:51"}
+{"current_steps": 1130, "total_steps": 35625, "loss": 0.6889, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.988019179808398e-05, "epoch": 0.15859649122807018, "percentage": 3.17, "elapsed_time": "0:07:10", "remaining_time": "3:38:49"}
+{"current_steps": 1140, "total_steps": 35625, "loss": 0.7086, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987802513284169e-05, "epoch": 0.16, "percentage": 3.2, "elapsed_time": "0:07:14", "remaining_time": "3:39:04"}
+{"current_steps": 1150, "total_steps": 35625, "loss": 0.6727, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9875839099204134e-05, "epoch": 0.16140350877192983, "percentage": 3.23, "elapsed_time": "0:07:18", "remaining_time": "3:39:09"}
+{"current_steps": 1160, "total_steps": 35625, "loss": 0.6993, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987363369887324e-05, "epoch": 0.16280701754385965, "percentage": 3.26, "elapsed_time": "0:07:22", "remaining_time": "3:39:05"}
+{"current_steps": 1170, "total_steps": 35625, "loss": 0.5671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987140893356597e-05, "epoch": 0.16421052631578947, "percentage": 3.28, "elapsed_time": "0:07:26", "remaining_time": "3:39:21"}
+{"current_steps": 1180, "total_steps": 35625, "loss": 0.6929, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.986916480501438e-05, "epoch": 0.1656140350877193, "percentage": 3.31, "elapsed_time": "0:07:31", "remaining_time": "3:39:29"}
+{"current_steps": 1190, "total_steps": 35625, "loss": 0.5368, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.986690131496561e-05, "epoch": 0.16701754385964912, "percentage": 3.34, "elapsed_time": "0:07:34", "remaining_time": "3:39:07"}
+{"current_steps": 1200, "total_steps": 35625, "loss": 0.6473, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.986461846518186e-05, "epoch": 0.16842105263157894, "percentage": 3.37, "elapsed_time": "0:07:40", "remaining_time": "3:39:57"}
+{"current_steps": 1210, "total_steps": 35625, "loss": 0.6698, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.986231625744041e-05, "epoch": 0.16982456140350877, "percentage": 3.4, "elapsed_time": "0:07:44", "remaining_time": "3:40:09"}
+{"current_steps": 1220, "total_steps": 35625, "loss": 0.6747, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985999469353359e-05, "epoch": 0.1712280701754386, "percentage": 3.42, "elapsed_time": "0:07:48", "remaining_time": "3:40:01"}
+{"current_steps": 1230, "total_steps": 35625, "loss": 0.6509, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9857653775268853e-05, "epoch": 0.1726315789473684, "percentage": 3.45, "elapsed_time": "0:07:52", "remaining_time": "3:40:12"}
+{"current_steps": 1240, "total_steps": 35625, "loss": 0.6895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985529350446865e-05, "epoch": 0.17403508771929824, "percentage": 3.48, "elapsed_time": "0:07:57", "remaining_time": "3:40:38"}
+{"current_steps": 1250, "total_steps": 35625, "loss": 0.5882, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985291388297055e-05, "epoch": 0.17543859649122806, "percentage": 3.51, "elapsed_time": "0:08:01", "remaining_time": "3:40:51"}
+{"current_steps": 1260, "total_steps": 35625, "loss": 0.6688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985051491262716e-05, "epoch": 0.17684210526315788, "percentage": 3.54, "elapsed_time": "0:08:07", "remaining_time": "3:41:32"}
+{"current_steps": 1270, "total_steps": 35625, "loss": 0.6135, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.984809659530617e-05, "epoch": 0.1782456140350877, "percentage": 3.56, "elapsed_time": "0:08:11", "remaining_time": "3:41:30"}
+{"current_steps": 1280, "total_steps": 35625, "loss": 0.6849, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9845658932890315e-05, "epoch": 0.17964912280701753, "percentage": 3.59, "elapsed_time": "0:08:14", "remaining_time": "3:41:04"}
+{"current_steps": 1290, "total_steps": 35625, "loss": 0.6036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9843201927277407e-05, "epoch": 0.18105263157894738, "percentage": 3.62, "elapsed_time": "0:08:17", "remaining_time": "3:40:40"}
+{"current_steps": 1300, "total_steps": 35625, "loss": 0.7348, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.984072558038031e-05, "epoch": 0.1824561403508772, "percentage": 3.65, "elapsed_time": "0:08:21", "remaining_time": "3:40:42"}
+{"current_steps": 1310, "total_steps": 35625, "loss": 0.7679, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.983822989412693e-05, "epoch": 0.18385964912280703, "percentage": 3.68, "elapsed_time": "0:08:26", "remaining_time": "3:40:58"}
+{"current_steps": 1320, "total_steps": 35625, "loss": 0.7083, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.983571487046026e-05, "epoch": 0.18526315789473685, "percentage": 3.71, "elapsed_time": "0:08:31", "remaining_time": "3:41:29"}
+{"current_steps": 1330, "total_steps": 35625, "loss": 0.6417, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9833180511338314e-05, "epoch": 0.18666666666666668, "percentage": 3.73, "elapsed_time": "0:08:34", "remaining_time": "3:41:03"}
+{"current_steps": 1340, "total_steps": 35625, "loss": 0.6817, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.983062681873421e-05, "epoch": 0.1880701754385965, "percentage": 3.76, "elapsed_time": "0:08:37", "remaining_time": "3:40:45"}
+{"current_steps": 1350, "total_steps": 35625, "loss": 0.6554, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982805379463605e-05, "epoch": 0.18947368421052632, "percentage": 3.79, "elapsed_time": "0:08:40", "remaining_time": "3:40:16"}
+{"current_steps": 1360, "total_steps": 35625, "loss": 0.6613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982546144104704e-05, "epoch": 0.19087719298245615, "percentage": 3.82, "elapsed_time": "0:08:45", "remaining_time": "3:40:32"}
+{"current_steps": 1370, "total_steps": 35625, "loss": 0.6902, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982284975998541e-05, "epoch": 0.19228070175438597, "percentage": 3.85, "elapsed_time": "0:08:48", "remaining_time": "3:40:13"}
+{"current_steps": 1380, "total_steps": 35625, "loss": 0.81, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982021875348445e-05, "epoch": 0.1936842105263158, "percentage": 3.87, "elapsed_time": "0:08:52", "remaining_time": "3:40:19"}
+{"current_steps": 1390, "total_steps": 35625, "loss": 0.6796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9817568423592484e-05, "epoch": 0.19508771929824562, "percentage": 3.9, "elapsed_time": "0:08:56", "remaining_time": "3:40:21"}
+{"current_steps": 1400, "total_steps": 35625, "loss": 0.6109, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.981489877237288e-05, "epoch": 0.19649122807017544, "percentage": 3.93, "elapsed_time": "0:09:00", "remaining_time": "3:40:04"}
+{"current_steps": 1410, "total_steps": 35625, "loss": 0.7521, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9812209801904064e-05, "epoch": 0.19789473684210526, "percentage": 3.96, "elapsed_time": "0:09:04", "remaining_time": "3:40:09"}
+{"current_steps": 1420, "total_steps": 35625, "loss": 0.6742, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.980950151427948e-05, "epoch": 0.19929824561403509, "percentage": 3.99, "elapsed_time": "0:09:08", "remaining_time": "3:40:04"}
+{"current_steps": 1430, "total_steps": 35625, "loss": 0.659, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.980677391160763e-05, "epoch": 0.2007017543859649, "percentage": 4.01, "elapsed_time": "0:09:11", "remaining_time": "3:39:41"}
+{"current_steps": 1440, "total_steps": 35625, "loss": 0.6392, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.980402699601205e-05, "epoch": 0.20210526315789473, "percentage": 4.04, "elapsed_time": "0:09:15", "remaining_time": "3:39:42"}
+{"current_steps": 1450, "total_steps": 35625, "loss": 0.6674, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.98012607696313e-05, "epoch": 0.20350877192982456, "percentage": 4.07, "elapsed_time": "0:09:20", "remaining_time": "3:40:00"}
+{"current_steps": 1460, "total_steps": 35625, "loss": 0.6772, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.979847523461898e-05, "epoch": 0.20491228070175438, "percentage": 4.1, "elapsed_time": "0:09:24", "remaining_time": "3:40:03"}
+{"current_steps": 1470, "total_steps": 35625, "loss": 0.6598, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9795670393143735e-05, "epoch": 0.2063157894736842, "percentage": 4.13, "elapsed_time": "0:09:29", "remaining_time": "3:40:30"}
+{"current_steps": 1480, "total_steps": 35625, "loss": 0.6784, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9792846247389214e-05, "epoch": 0.20771929824561403, "percentage": 4.15, "elapsed_time": "0:09:32", "remaining_time": "3:40:16"}
+{"current_steps": 1490, "total_steps": 35625, "loss": 0.673, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.979000279955413e-05, "epoch": 0.20912280701754385, "percentage": 4.18, "elapsed_time": "0:09:37", "remaining_time": "3:40:29"}
+{"current_steps": 1500, "total_steps": 35625, "loss": 0.6018, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.97871400518522e-05, "epoch": 0.21052631578947367, "percentage": 4.21, "elapsed_time": "0:09:41", "remaining_time": "3:40:24"}
+{"current_steps": 1510, "total_steps": 35625, "loss": 0.673, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.978425800651216e-05, "epoch": 0.2119298245614035, "percentage": 4.24, "elapsed_time": "0:09:45", "remaining_time": "3:40:20"}
+{"current_steps": 1520, "total_steps": 35625, "loss": 0.6455, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.978135666577779e-05, "epoch": 0.21333333333333335, "percentage": 4.27, "elapsed_time": "0:09:48", "remaining_time": "3:39:58"}
+{"current_steps": 1530, "total_steps": 35625, "loss": 0.6945, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.977843603190788e-05, "epoch": 0.21473684210526317, "percentage": 4.29, "elapsed_time": "0:09:51", "remaining_time": "3:39:39"}
+{"current_steps": 1540, "total_steps": 35625, "loss": 0.6441, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9775496107176245e-05, "epoch": 0.216140350877193, "percentage": 4.32, "elapsed_time": "0:09:54", "remaining_time": "3:39:19"}
+{"current_steps": 1550, "total_steps": 35625, "loss": 0.6399, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.977253689387172e-05, "epoch": 0.21754385964912282, "percentage": 4.35, "elapsed_time": "0:09:57", "remaining_time": "3:38:58"}
+{"current_steps": 1560, "total_steps": 35625, "loss": 0.7059, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976955839429815e-05, "epoch": 0.21894736842105264, "percentage": 4.38, "elapsed_time": "0:10:01", "remaining_time": "3:38:53"}
+{"current_steps": 1570, "total_steps": 35625, "loss": 0.7422, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976656061077441e-05, "epoch": 0.22035087719298246, "percentage": 4.41, "elapsed_time": "0:10:06", "remaining_time": "3:39:11"}
+{"current_steps": 1580, "total_steps": 35625, "loss": 0.7, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976354354563435e-05, "epoch": 0.2217543859649123, "percentage": 4.44, "elapsed_time": "0:10:10", "remaining_time": "3:39:04"}
+{"current_steps": 1590, "total_steps": 35625, "loss": 0.693, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976050720122688e-05, "epoch": 0.2231578947368421, "percentage": 4.46, "elapsed_time": "0:10:13", "remaining_time": "3:39:01"}
+{"current_steps": 1600, "total_steps": 35625, "loss": 0.6477, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.97574515799159e-05, "epoch": 0.22456140350877193, "percentage": 4.49, "elapsed_time": "0:10:17", "remaining_time": "3:38:41"}
+{"current_steps": 1610, "total_steps": 35625, "loss": 0.6839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.975437668408031e-05, "epoch": 0.22596491228070176, "percentage": 4.52, "elapsed_time": "0:10:21", "remaining_time": "3:38:57"}
+{"current_steps": 1620, "total_steps": 35625, "loss": 0.5796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9751282516114024e-05, "epoch": 0.22736842105263158, "percentage": 4.55, "elapsed_time": "0:10:25", "remaining_time": "3:38:50"}
+{"current_steps": 1630, "total_steps": 35625, "loss": 0.6967, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9748169078425955e-05, "epoch": 0.2287719298245614, "percentage": 4.58, "elapsed_time": "0:10:28", "remaining_time": "3:38:33"}
+{"current_steps": 1640, "total_steps": 35625, "loss": 0.5507, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.974503637344002e-05, "epoch": 0.23017543859649123, "percentage": 4.6, "elapsed_time": "0:10:31", "remaining_time": "3:38:12"}
+{"current_steps": 1650, "total_steps": 35625, "loss": 0.6792, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9741884403595135e-05, "epoch": 0.23157894736842105, "percentage": 4.63, "elapsed_time": "0:10:35", "remaining_time": "3:37:59"}
+{"current_steps": 1660, "total_steps": 35625, "loss": 0.616, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9738713171345225e-05, "epoch": 0.23298245614035087, "percentage": 4.66, "elapsed_time": "0:10:38", "remaining_time": "3:37:38"}
+{"current_steps": 1670, "total_steps": 35625, "loss": 0.635, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9735522679159195e-05, "epoch": 0.2343859649122807, "percentage": 4.69, "elapsed_time": "0:10:41", "remaining_time": "3:37:32"}
+{"current_steps": 1680, "total_steps": 35625, "loss": 0.6902, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9732312929520964e-05, "epoch": 0.23578947368421052, "percentage": 4.72, "elapsed_time": "0:10:45", "remaining_time": "3:37:25"}
+{"current_steps": 1690, "total_steps": 35625, "loss": 0.6899, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.972908392492942e-05, "epoch": 0.23719298245614034, "percentage": 4.74, "elapsed_time": "0:10:48", "remaining_time": "3:37:05"}
+{"current_steps": 1700, "total_steps": 35625, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9725835667898455e-05, "epoch": 0.23859649122807017, "percentage": 4.77, "elapsed_time": "0:10:52", "remaining_time": "3:37:10"}
+{"current_steps": 1710, "total_steps": 35625, "loss": 0.6379, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.972256816095695e-05, "epoch": 0.24, "percentage": 4.8, "elapsed_time": "0:10:56", "remaining_time": "3:36:56"}
+{"current_steps": 1720, "total_steps": 35625, "loss": 0.6819, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.971928140664878e-05, "epoch": 0.24140350877192981, "percentage": 4.83, "elapsed_time": "0:10:59", "remaining_time": "3:36:41"}
+{"current_steps": 1730, "total_steps": 35625, "loss": 0.6888, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.971597540753279e-05, "epoch": 0.24280701754385964, "percentage": 4.86, "elapsed_time": "0:11:04", "remaining_time": "3:36:54"}
+{"current_steps": 1740, "total_steps": 35625, "loss": 0.6761, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.971265016618281e-05, "epoch": 0.24421052631578946, "percentage": 4.88, "elapsed_time": "0:11:07", "remaining_time": "3:36:42"}
+{"current_steps": 1750, "total_steps": 35625, "loss": 0.5544, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.970930568518765e-05, "epoch": 0.24561403508771928, "percentage": 4.91, "elapsed_time": "0:11:12", "remaining_time": "3:36:51"}
+{"current_steps": 1760, "total_steps": 35625, "loss": 0.7072, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.97059419671511e-05, "epoch": 0.24701754385964914, "percentage": 4.94, "elapsed_time": "0:11:18", "remaining_time": "3:37:26"}
+{"current_steps": 1770, "total_steps": 35625, "loss": 0.5992, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9702559014691965e-05, "epoch": 0.24842105263157896, "percentage": 4.97, "elapsed_time": "0:11:22", "remaining_time": "3:37:29"}
+{"current_steps": 1780, "total_steps": 35625, "loss": 0.6277, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.969915683044395e-05, "epoch": 0.24982456140350878, "percentage": 5.0, "elapsed_time": "0:11:26", "remaining_time": "3:37:38"}
+{"current_steps": 1790, "total_steps": 35625, "loss": 0.5501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9695735417055776e-05, "epoch": 0.2512280701754386, "percentage": 5.02, "elapsed_time": "0:11:29", "remaining_time": "3:37:19"}
+{"current_steps": 1800, "total_steps": 35625, "loss": 0.7619, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.969229477719116e-05, "epoch": 0.25263157894736843, "percentage": 5.05, "elapsed_time": "0:11:34", "remaining_time": "3:37:27"}
+{"current_steps": 1810, "total_steps": 35625, "loss": 0.6706, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9688834913528724e-05, "epoch": 0.2540350877192982, "percentage": 5.08, "elapsed_time": "0:11:37", "remaining_time": "3:37:19"}
+{"current_steps": 1820, "total_steps": 35625, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9685355828762115e-05, "epoch": 0.2554385964912281, "percentage": 5.11, "elapsed_time": "0:11:41", "remaining_time": "3:37:13"}
+{"current_steps": 1830, "total_steps": 35625, "loss": 0.7195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.96818575255999e-05, "epoch": 0.25684210526315787, "percentage": 5.14, "elapsed_time": "0:11:44", "remaining_time": "3:36:55"}
+{"current_steps": 1840, "total_steps": 35625, "loss": 0.6196, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.967834000676564e-05, "epoch": 0.2582456140350877, "percentage": 5.16, "elapsed_time": "0:11:48", "remaining_time": "3:36:40"}
+{"current_steps": 1850, "total_steps": 35625, "loss": 0.5768, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.967480327499785e-05, "epoch": 0.2596491228070175, "percentage": 5.19, "elapsed_time": "0:11:52", "remaining_time": "3:36:53"}
+{"current_steps": 1860, "total_steps": 35625, "loss": 0.6484, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9671247333049975e-05, "epoch": 0.26105263157894737, "percentage": 5.22, "elapsed_time": "0:11:56", "remaining_time": "3:36:44"}
+{"current_steps": 1870, "total_steps": 35625, "loss": 0.6132, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966767218369046e-05, "epoch": 0.2624561403508772, "percentage": 5.25, "elapsed_time": "0:11:59", "remaining_time": "3:36:33"}
+{"current_steps": 1880, "total_steps": 35625, "loss": 0.6435, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966407782970267e-05, "epoch": 0.263859649122807, "percentage": 5.28, "elapsed_time": "0:12:02", "remaining_time": "3:36:16"}
+{"current_steps": 1890, "total_steps": 35625, "loss": 0.6581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966046427388494e-05, "epoch": 0.26526315789473687, "percentage": 5.31, "elapsed_time": "0:12:06", "remaining_time": "3:35:58"}
+{"current_steps": 1900, "total_steps": 35625, "loss": 0.593, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.965683151905054e-05, "epoch": 0.26666666666666666, "percentage": 5.33, "elapsed_time": "0:12:09", "remaining_time": "3:35:44"}
+{"current_steps": 1910, "total_steps": 35625, "loss": 0.7037, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.965317956802769e-05, "epoch": 0.2680701754385965, "percentage": 5.36, "elapsed_time": "0:12:12", "remaining_time": "3:35:29"}
+{"current_steps": 1920, "total_steps": 35625, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.964950842365957e-05, "epoch": 0.2694736842105263, "percentage": 5.39, "elapsed_time": "0:12:15", "remaining_time": "3:35:14"}
+{"current_steps": 1930, "total_steps": 35625, "loss": 0.6463, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9645818088804284e-05, "epoch": 0.27087719298245616, "percentage": 5.42, "elapsed_time": "0:12:21", "remaining_time": "3:35:45"}
+{"current_steps": 1940, "total_steps": 35625, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.964210856633489e-05, "epoch": 0.27228070175438596, "percentage": 5.45, "elapsed_time": "0:12:26", "remaining_time": "3:36:02"}
+{"current_steps": 1950, "total_steps": 35625, "loss": 0.7659, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.963837985913938e-05, "epoch": 0.2736842105263158, "percentage": 5.47, "elapsed_time": "0:12:29", "remaining_time": "3:35:50"}
+{"current_steps": 1960, "total_steps": 35625, "loss": 0.7388, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.963463197012067e-05, "epoch": 0.2750877192982456, "percentage": 5.5, "elapsed_time": "0:12:32", "remaining_time": "3:35:32"}
+{"current_steps": 1970, "total_steps": 35625, "loss": 0.6961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9630864902196626e-05, "epoch": 0.27649122807017545, "percentage": 5.53, "elapsed_time": "0:12:36", "remaining_time": "3:35:32"}
+{"current_steps": 1980, "total_steps": 35625, "loss": 0.5755, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.962707865830004e-05, "epoch": 0.27789473684210525, "percentage": 5.56, "elapsed_time": "0:12:41", "remaining_time": "3:35:33"}
+{"current_steps": 1990, "total_steps": 35625, "loss": 0.6845, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9623273241378636e-05, "epoch": 0.2792982456140351, "percentage": 5.59, "elapsed_time": "0:12:44", "remaining_time": "3:35:19"}
+{"current_steps": 2000, "total_steps": 35625, "loss": 0.7236, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9619448654395055e-05, "epoch": 0.2807017543859649, "percentage": 5.61, "elapsed_time": "0:12:49", "remaining_time": "3:35:33"}
+{"current_steps": 2000, "total_steps": 35625, "loss": null, "eval_loss": 0.6709622740745544, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.2807017543859649, "percentage": 5.61, "elapsed_time": "0:12:49", "remaining_time": "3:35:33"}
+{"current_steps": 2010, "total_steps": 35625, "loss": 0.5944, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9615604900326875e-05, "epoch": 0.28210526315789475, "percentage": 5.64, "elapsed_time": "0:13:36", "remaining_time": "3:47:41"}
+{"current_steps": 2020, "total_steps": 35625, "loss": 0.6174, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.961174198216658e-05, "epoch": 0.28350877192982454, "percentage": 5.67, "elapsed_time": "0:13:40", "remaining_time": "3:47:36"}
+{"current_steps": 2030, "total_steps": 35625, "loss": 0.6801, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9607859902921595e-05, "epoch": 0.2849122807017544, "percentage": 5.7, "elapsed_time": "0:13:44", "remaining_time": "3:47:24"}
+{"current_steps": 2040, "total_steps": 35625, "loss": 0.6657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.960395866561425e-05, "epoch": 0.2863157894736842, "percentage": 5.73, "elapsed_time": "0:13:49", "remaining_time": "3:47:31"}
+{"current_steps": 2050, "total_steps": 35625, "loss": 0.6536, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.960003827328179e-05, "epoch": 0.28771929824561404, "percentage": 5.75, "elapsed_time": "0:13:52", "remaining_time": "3:47:16"}
+{"current_steps": 2060, "total_steps": 35625, "loss": 0.6361, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.959609872897637e-05, "epoch": 0.28912280701754384, "percentage": 5.78, "elapsed_time": "0:13:56", "remaining_time": "3:47:14"}
+{"current_steps": 2070, "total_steps": 35625, "loss": 0.6369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.959214003576507e-05, "epoch": 0.2905263157894737, "percentage": 5.81, "elapsed_time": "0:14:00", "remaining_time": "3:47:05"}
+{"current_steps": 2080, "total_steps": 35625, "loss": 0.7563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.958816219672986e-05, "epoch": 0.2919298245614035, "percentage": 5.84, "elapsed_time": "0:14:04", "remaining_time": "3:46:58"}
+{"current_steps": 2090, "total_steps": 35625, "loss": 0.6575, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9584165214967634e-05, "epoch": 0.29333333333333333, "percentage": 5.87, "elapsed_time": "0:14:07", "remaining_time": "3:46:37"}
+{"current_steps": 2100, "total_steps": 35625, "loss": 0.6611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9580149093590165e-05, "epoch": 0.29473684210526313, "percentage": 5.89, "elapsed_time": "0:14:11", "remaining_time": "3:46:29"}
+{"current_steps": 2110, "total_steps": 35625, "loss": 0.6456, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.957611383572415e-05, "epoch": 0.296140350877193, "percentage": 5.92, "elapsed_time": "0:14:14", "remaining_time": "3:46:15"}
+{"current_steps": 2120, "total_steps": 35625, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9572059444511175e-05, "epoch": 0.29754385964912283, "percentage": 5.95, "elapsed_time": "0:14:18", "remaining_time": "3:46:11"}
+{"current_steps": 2130, "total_steps": 35625, "loss": 0.6338, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.956798592310773e-05, "epoch": 0.29894736842105263, "percentage": 5.98, "elapsed_time": "0:14:22", "remaining_time": "3:45:56"}
+{"current_steps": 2140, "total_steps": 35625, "loss": 0.6323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.956389327468518e-05, "epoch": 0.3003508771929825, "percentage": 6.01, "elapsed_time": "0:14:26", "remaining_time": "3:46:01"}
+{"current_steps": 2150, "total_steps": 35625, "loss": 0.6231, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9559781502429784e-05, "epoch": 0.3017543859649123, "percentage": 6.04, "elapsed_time": "0:14:30", "remaining_time": "3:45:55"}
+{"current_steps": 2160, "total_steps": 35625, "loss": 0.6515, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.955565060954272e-05, "epoch": 0.3031578947368421, "percentage": 6.06, "elapsed_time": "0:14:34", "remaining_time": "3:45:46"}
+{"current_steps": 2170, "total_steps": 35625, "loss": 0.6023, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9551500599240006e-05, "epoch": 0.3045614035087719, "percentage": 6.09, "elapsed_time": "0:14:38", "remaining_time": "3:45:44"}
+{"current_steps": 2180, "total_steps": 35625, "loss": 0.678, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.954733147475259e-05, "epoch": 0.3059649122807018, "percentage": 6.12, "elapsed_time": "0:14:41", "remaining_time": "3:45:27"}
+{"current_steps": 2190, "total_steps": 35625, "loss": 0.6307, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.954314323932627e-05, "epoch": 0.30736842105263157, "percentage": 6.15, "elapsed_time": "0:14:46", "remaining_time": "3:45:28"}
+{"current_steps": 2200, "total_steps": 35625, "loss": 0.6543, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.953893589622172e-05, "epoch": 0.3087719298245614, "percentage": 6.18, "elapsed_time": "0:14:50", "remaining_time": "3:45:22"}
+{"current_steps": 2210, "total_steps": 35625, "loss": 0.7493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9534709448714514e-05, "epoch": 0.3101754385964912, "percentage": 6.2, "elapsed_time": "0:14:53", "remaining_time": "3:45:08"}
+{"current_steps": 2220, "total_steps": 35625, "loss": 0.6856, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9530463900095084e-05, "epoch": 0.31157894736842107, "percentage": 6.23, "elapsed_time": "0:14:57", "remaining_time": "3:45:02"}
+{"current_steps": 2230, "total_steps": 35625, "loss": 0.5721, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.952619925366873e-05, "epoch": 0.31298245614035086, "percentage": 6.26, "elapsed_time": "0:15:00", "remaining_time": "3:44:46"}
+{"current_steps": 2240, "total_steps": 35625, "loss": 0.7126, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9521915512755635e-05, "epoch": 0.3143859649122807, "percentage": 6.29, "elapsed_time": "0:15:03", "remaining_time": "3:44:32"}
+{"current_steps": 2250, "total_steps": 35625, "loss": 0.7522, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.951761268069082e-05, "epoch": 0.3157894736842105, "percentage": 6.32, "elapsed_time": "0:15:07", "remaining_time": "3:44:26"}
+{"current_steps": 2260, "total_steps": 35625, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.95132907608242e-05, "epoch": 0.31719298245614036, "percentage": 6.34, "elapsed_time": "0:15:13", "remaining_time": "3:44:44"}
+{"current_steps": 2270, "total_steps": 35625, "loss": 0.7592, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.950894975652055e-05, "epoch": 0.31859649122807016, "percentage": 6.37, "elapsed_time": "0:15:16", "remaining_time": "3:44:28"}
+{"current_steps": 2280, "total_steps": 35625, "loss": 0.5719, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.950458967115946e-05, "epoch": 0.32, "percentage": 6.4, "elapsed_time": "0:15:21", "remaining_time": "3:44:42"}
+{"current_steps": 2290, "total_steps": 35625, "loss": 0.6288, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9500210508135436e-05, "epoch": 0.3214035087719298, "percentage": 6.43, "elapsed_time": "0:15:25", "remaining_time": "3:44:34"}
+{"current_steps": 2300, "total_steps": 35625, "loss": 0.7081, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9495812270857786e-05, "epoch": 0.32280701754385965, "percentage": 6.46, "elapsed_time": "0:15:30", "remaining_time": "3:44:35"}
+{"current_steps": 2310, "total_steps": 35625, "loss": 0.6371, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.94913949627507e-05, "epoch": 0.32421052631578945, "percentage": 6.48, "elapsed_time": "0:15:33", "remaining_time": "3:44:18"}
+{"current_steps": 2320, "total_steps": 35625, "loss": 0.6712, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9486958587253195e-05, "epoch": 0.3256140350877193, "percentage": 6.51, "elapsed_time": "0:15:36", "remaining_time": "3:44:00"}
+{"current_steps": 2330, "total_steps": 35625, "loss": 0.6232, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9482503147819156e-05, "epoch": 0.3270175438596491, "percentage": 6.54, "elapsed_time": "0:15:40", "remaining_time": "3:43:52"}
+{"current_steps": 2340, "total_steps": 35625, "loss": 0.6519, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.947802864791727e-05, "epoch": 0.32842105263157895, "percentage": 6.57, "elapsed_time": "0:15:44", "remaining_time": "3:43:48"}
+{"current_steps": 2350, "total_steps": 35625, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.947353509103112e-05, "epoch": 0.3298245614035088, "percentage": 6.6, "elapsed_time": "0:15:47", "remaining_time": "3:43:39"}
+{"current_steps": 2360, "total_steps": 35625, "loss": 0.6784, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.946902248065907e-05, "epoch": 0.3312280701754386, "percentage": 6.62, "elapsed_time": "0:15:51", "remaining_time": "3:43:37"}
+{"current_steps": 2370, "total_steps": 35625, "loss": 0.612, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.946449082031435e-05, "epoch": 0.33263157894736844, "percentage": 6.65, "elapsed_time": "0:15:55", "remaining_time": "3:43:33"}
+{"current_steps": 2380, "total_steps": 35625, "loss": 0.7573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9459940113525014e-05, "epoch": 0.33403508771929824, "percentage": 6.68, "elapsed_time": "0:15:59", "remaining_time": "3:43:18"}
+{"current_steps": 2390, "total_steps": 35625, "loss": 0.7167, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.945537036383394e-05, "epoch": 0.3354385964912281, "percentage": 6.71, "elapsed_time": "0:16:02", "remaining_time": "3:43:04"}
+{"current_steps": 2400, "total_steps": 35625, "loss": 0.5797, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.945078157479884e-05, "epoch": 0.3368421052631579, "percentage": 6.74, "elapsed_time": "0:16:05", "remaining_time": "3:42:47"}
+{"current_steps": 2410, "total_steps": 35625, "loss": 0.603, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.944617374999224e-05, "epoch": 0.33824561403508774, "percentage": 6.76, "elapsed_time": "0:16:08", "remaining_time": "3:42:32"}
+{"current_steps": 2420, "total_steps": 35625, "loss": 0.6496, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.944154689300148e-05, "epoch": 0.33964912280701753, "percentage": 6.79, "elapsed_time": "0:16:13", "remaining_time": "3:42:36"}
+{"current_steps": 2430, "total_steps": 35625, "loss": 0.6945, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.943690100742875e-05, "epoch": 0.3410526315789474, "percentage": 6.82, "elapsed_time": "0:16:16", "remaining_time": "3:42:19"}
+{"current_steps": 2440, "total_steps": 35625, "loss": 0.6489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.943223609689101e-05, "epoch": 0.3424561403508772, "percentage": 6.85, "elapsed_time": "0:16:19", "remaining_time": "3:42:02"}
+{"current_steps": 2450, "total_steps": 35625, "loss": 0.6066, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9427552165020066e-05, "epoch": 0.34385964912280703, "percentage": 6.88, "elapsed_time": "0:16:23", "remaining_time": "3:41:58"}
+{"current_steps": 2460, "total_steps": 35625, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9422849215462506e-05, "epoch": 0.3452631578947368, "percentage": 6.91, "elapsed_time": "0:16:27", "remaining_time": "3:41:56"}
+{"current_steps": 2470, "total_steps": 35625, "loss": 0.7249, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9418127251879756e-05, "epoch": 0.3466666666666667, "percentage": 6.93, "elapsed_time": "0:16:33", "remaining_time": "3:42:09"}
+{"current_steps": 2480, "total_steps": 35625, "loss": 0.7049, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9413386277948006e-05, "epoch": 0.3480701754385965, "percentage": 6.96, "elapsed_time": "0:16:36", "remaining_time": "3:41:57"}
+{"current_steps": 2490, "total_steps": 35625, "loss": 0.7138, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9408626297358286e-05, "epoch": 0.3494736842105263, "percentage": 6.99, "elapsed_time": "0:16:41", "remaining_time": "3:42:02"}
+{"current_steps": 2500, "total_steps": 35625, "loss": 0.6047, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.940384731381639e-05, "epoch": 0.3508771929824561, "percentage": 7.02, "elapsed_time": "0:16:44", "remaining_time": "3:41:45"}
+{"current_steps": 2510, "total_steps": 35625, "loss": 0.6098, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9399049331042925e-05, "epoch": 0.35228070175438597, "percentage": 7.05, "elapsed_time": "0:16:48", "remaining_time": "3:41:50"}
+{"current_steps": 2520, "total_steps": 35625, "loss": 0.6862, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.939423235277328e-05, "epoch": 0.35368421052631577, "percentage": 7.07, "elapsed_time": "0:16:52", "remaining_time": "3:41:35"}
+{"current_steps": 2530, "total_steps": 35625, "loss": 0.7044, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.938939638275765e-05, "epoch": 0.3550877192982456, "percentage": 7.1, "elapsed_time": "0:16:55", "remaining_time": "3:41:25"}
+{"current_steps": 2540, "total_steps": 35625, "loss": 0.6377, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.938454142476099e-05, "epoch": 0.3564912280701754, "percentage": 7.13, "elapsed_time": "0:17:00", "remaining_time": "3:41:28"}
+{"current_steps": 2550, "total_steps": 35625, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9379667482563066e-05, "epoch": 0.35789473684210527, "percentage": 7.16, "elapsed_time": "0:17:05", "remaining_time": "3:41:46"}
+{"current_steps": 2560, "total_steps": 35625, "loss": 0.5563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.937477455995839e-05, "epoch": 0.35929824561403506, "percentage": 7.19, "elapsed_time": "0:17:09", "remaining_time": "3:41:36"}
+{"current_steps": 2570, "total_steps": 35625, "loss": 0.7217, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9369862660756286e-05, "epoch": 0.3607017543859649, "percentage": 7.21, "elapsed_time": "0:17:13", "remaining_time": "3:41:30"}
+{"current_steps": 2580, "total_steps": 35625, "loss": 0.6424, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9364931788780835e-05, "epoch": 0.36210526315789476, "percentage": 7.24, "elapsed_time": "0:17:20", "remaining_time": "3:42:00"}
+{"current_steps": 2590, "total_steps": 35625, "loss": 0.6449, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9359981947870874e-05, "epoch": 0.36350877192982456, "percentage": 7.27, "elapsed_time": "0:17:23", "remaining_time": "3:41:45"}
+{"current_steps": 2600, "total_steps": 35625, "loss": 0.7963, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9355013141880045e-05, "epoch": 0.3649122807017544, "percentage": 7.3, "elapsed_time": "0:17:26", "remaining_time": "3:41:33"}
+{"current_steps": 2610, "total_steps": 35625, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9350025374676725e-05, "epoch": 0.3663157894736842, "percentage": 7.33, "elapsed_time": "0:17:30", "remaining_time": "3:41:25"}
+{"current_steps": 2620, "total_steps": 35625, "loss": 0.5228, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.934501865014405e-05, "epoch": 0.36771929824561406, "percentage": 7.35, "elapsed_time": "0:17:34", "remaining_time": "3:41:20"}
+{"current_steps": 2630, "total_steps": 35625, "loss": 0.6206, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.933999297217994e-05, "epoch": 0.36912280701754385, "percentage": 7.38, "elapsed_time": "0:17:37", "remaining_time": "3:41:04"}
+{"current_steps": 2640, "total_steps": 35625, "loss": 0.6324, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.933494834469706e-05, "epoch": 0.3705263157894737, "percentage": 7.41, "elapsed_time": "0:17:40", "remaining_time": "3:40:51"}
+{"current_steps": 2650, "total_steps": 35625, "loss": 0.7658, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9329884771622817e-05, "epoch": 0.3719298245614035, "percentage": 7.44, "elapsed_time": "0:17:44", "remaining_time": "3:40:49"}
+{"current_steps": 2660, "total_steps": 35625, "loss": 0.6711, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9324802256899385e-05, "epoch": 0.37333333333333335, "percentage": 7.47, "elapsed_time": "0:17:48", "remaining_time": "3:40:45"}
+{"current_steps": 2670, "total_steps": 35625, "loss": 0.5751, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.931970080448366e-05, "epoch": 0.37473684210526315, "percentage": 7.49, "elapsed_time": "0:17:53", "remaining_time": "3:40:48"}
+{"current_steps": 2680, "total_steps": 35625, "loss": 0.6772, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.931458041834731e-05, "epoch": 0.376140350877193, "percentage": 7.52, "elapsed_time": "0:17:57", "remaining_time": "3:40:48"}
+{"current_steps": 2690, "total_steps": 35625, "loss": 0.6141, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9309441102476734e-05, "epoch": 0.3775438596491228, "percentage": 7.55, "elapsed_time": "0:18:01", "remaining_time": "3:40:37"}
+{"current_steps": 2700, "total_steps": 35625, "loss": 0.627, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.930428286087306e-05, "epoch": 0.37894736842105264, "percentage": 7.58, "elapsed_time": "0:18:04", "remaining_time": "3:40:28"}
+{"current_steps": 2710, "total_steps": 35625, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.929910569755215e-05, "epoch": 0.38035087719298244, "percentage": 7.61, "elapsed_time": "0:18:09", "remaining_time": "3:40:37"}
+{"current_steps": 2720, "total_steps": 35625, "loss": 0.6379, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.929390961654462e-05, "epoch": 0.3817543859649123, "percentage": 7.64, "elapsed_time": "0:18:13", "remaining_time": "3:40:22"}
+{"current_steps": 2730, "total_steps": 35625, "loss": 0.673, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9288694621895776e-05, "epoch": 0.3831578947368421, "percentage": 7.66, "elapsed_time": "0:18:17", "remaining_time": "3:40:20"}
+{"current_steps": 2740, "total_steps": 35625, "loss": 0.6562, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.928346071766569e-05, "epoch": 0.38456140350877194, "percentage": 7.69, "elapsed_time": "0:18:21", "remaining_time": "3:40:22"}
+{"current_steps": 2750, "total_steps": 35625, "loss": 0.6517, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.927820790792912e-05, "epoch": 0.38596491228070173, "percentage": 7.72, "elapsed_time": "0:18:24", "remaining_time": "3:40:07"}
+{"current_steps": 2760, "total_steps": 35625, "loss": 0.6506, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9272936196775565e-05, "epoch": 0.3873684210526316, "percentage": 7.75, "elapsed_time": "0:18:28", "remaining_time": "3:40:01"}
+{"current_steps": 2770, "total_steps": 35625, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.926764558830923e-05, "epoch": 0.3887719298245614, "percentage": 7.78, "elapsed_time": "0:18:34", "remaining_time": "3:40:14"}
+{"current_steps": 2780, "total_steps": 35625, "loss": 0.6409, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.926233608664904e-05, "epoch": 0.39017543859649123, "percentage": 7.8, "elapsed_time": "0:18:37", "remaining_time": "3:40:05"}
+{"current_steps": 2790, "total_steps": 35625, "loss": 0.7131, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9257007695928624e-05, "epoch": 0.391578947368421, "percentage": 7.83, "elapsed_time": "0:18:41", "remaining_time": "3:39:53"}
+{"current_steps": 2800, "total_steps": 35625, "loss": 0.7967, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.925166042029631e-05, "epoch": 0.3929824561403509, "percentage": 7.86, "elapsed_time": "0:18:45", "remaining_time": "3:39:57"}
+{"current_steps": 2810, "total_steps": 35625, "loss": 0.6273, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.924629426391515e-05, "epoch": 0.39438596491228073, "percentage": 7.89, "elapsed_time": "0:18:50", "remaining_time": "3:39:58"}
+{"current_steps": 2820, "total_steps": 35625, "loss": 0.6419, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.924090923096286e-05, "epoch": 0.3957894736842105, "percentage": 7.92, "elapsed_time": "0:18:54", "remaining_time": "3:39:54"}
+{"current_steps": 2830, "total_steps": 35625, "loss": 0.6296, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.923550532563189e-05, "epoch": 0.3971929824561404, "percentage": 7.94, "elapsed_time": "0:18:57", "remaining_time": "3:39:47"}
+{"current_steps": 2840, "total_steps": 35625, "loss": 0.6323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.923008255212935e-05, "epoch": 0.39859649122807017, "percentage": 7.97, "elapsed_time": "0:19:01", "remaining_time": "3:39:41"}
+{"current_steps": 2850, "total_steps": 35625, "loss": 0.6613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.922464091467707e-05, "epoch": 0.4, "percentage": 8.0, "elapsed_time": "0:19:05", "remaining_time": "3:39:38"}
+{"current_steps": 2860, "total_steps": 35625, "loss": 0.6119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.921918041751155e-05, "epoch": 0.4014035087719298, "percentage": 8.03, "elapsed_time": "0:19:09", "remaining_time": "3:39:26"}
+{"current_steps": 2870, "total_steps": 35625, "loss": 0.6575, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9213701064883966e-05, "epoch": 0.40280701754385967, "percentage": 8.06, "elapsed_time": "0:19:12", "remaining_time": "3:39:10"}
+{"current_steps": 2880, "total_steps": 35625, "loss": 0.7369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9208202861060185e-05, "epoch": 0.40421052631578946, "percentage": 8.08, "elapsed_time": "0:19:16", "remaining_time": "3:39:10"}
+{"current_steps": 2890, "total_steps": 35625, "loss": 0.6551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.920268581032074e-05, "epoch": 0.4056140350877193, "percentage": 8.11, "elapsed_time": "0:19:21", "remaining_time": "3:39:13"}
+{"current_steps": 2900, "total_steps": 35625, "loss": 0.6584, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.919714991696086e-05, "epoch": 0.4070175438596491, "percentage": 8.14, "elapsed_time": "0:19:27", "remaining_time": "3:39:29"}
+{"current_steps": 2910, "total_steps": 35625, "loss": 0.6674, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9191595185290414e-05, "epoch": 0.40842105263157896, "percentage": 8.17, "elapsed_time": "0:19:31", "remaining_time": "3:39:26"}
+{"current_steps": 2920, "total_steps": 35625, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.918602161963396e-05, "epoch": 0.40982456140350876, "percentage": 8.2, "elapsed_time": "0:19:36", "remaining_time": "3:39:36"}
+{"current_steps": 2930, "total_steps": 35625, "loss": 0.5862, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9180429224330706e-05, "epoch": 0.4112280701754386, "percentage": 8.22, "elapsed_time": "0:19:40", "remaining_time": "3:39:30"}
+{"current_steps": 2940, "total_steps": 35625, "loss": 0.731, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.917481800373451e-05, "epoch": 0.4126315789473684, "percentage": 8.25, "elapsed_time": "0:19:45", "remaining_time": "3:39:38"}
+{"current_steps": 2950, "total_steps": 35625, "loss": 0.6716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.916918796221393e-05, "epoch": 0.41403508771929826, "percentage": 8.28, "elapsed_time": "0:19:50", "remaining_time": "3:39:44"}
+{"current_steps": 2960, "total_steps": 35625, "loss": 0.6427, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9163539104152124e-05, "epoch": 0.41543859649122805, "percentage": 8.31, "elapsed_time": "0:19:54", "remaining_time": "3:39:46"}
+{"current_steps": 2970, "total_steps": 35625, "loss": 0.6184, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9157871433946925e-05, "epoch": 0.4168421052631579, "percentage": 8.34, "elapsed_time": "0:19:58", "remaining_time": "3:39:33"}
+{"current_steps": 2980, "total_steps": 35625, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9152184956010813e-05, "epoch": 0.4182456140350877, "percentage": 8.36, "elapsed_time": "0:20:02", "remaining_time": "3:39:36"}
+{"current_steps": 2990, "total_steps": 35625, "loss": 0.6517, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.91464796747709e-05, "epoch": 0.41964912280701755, "percentage": 8.39, "elapsed_time": "0:20:06", "remaining_time": "3:39:26"}
+{"current_steps": 3000, "total_steps": 35625, "loss": 0.6656, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.914075559466895e-05, "epoch": 0.42105263157894735, "percentage": 8.42, "elapsed_time": "0:20:10", "remaining_time": "3:39:20"}
+{"current_steps": 3010, "total_steps": 35625, "loss": 0.6605, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.913501272016135e-05, "epoch": 0.4224561403508772, "percentage": 8.45, "elapsed_time": "0:20:14", "remaining_time": "3:39:22"}
+{"current_steps": 3020, "total_steps": 35625, "loss": 0.6348, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9129251055719125e-05, "epoch": 0.423859649122807, "percentage": 8.48, "elapsed_time": "0:20:18", "remaining_time": "3:39:10"}
+{"current_steps": 3030, "total_steps": 35625, "loss": 0.6735, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.912347060582793e-05, "epoch": 0.42526315789473684, "percentage": 8.51, "elapsed_time": "0:20:21", "remaining_time": "3:38:58"}
+{"current_steps": 3040, "total_steps": 35625, "loss": 0.604, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.911767137498805e-05, "epoch": 0.4266666666666667, "percentage": 8.53, "elapsed_time": "0:20:25", "remaining_time": "3:39:00"}
+{"current_steps": 3050, "total_steps": 35625, "loss": 0.659, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.911185336771437e-05, "epoch": 0.4280701754385965, "percentage": 8.56, "elapsed_time": "0:20:29", "remaining_time": "3:38:47"}
+{"current_steps": 3060, "total_steps": 35625, "loss": 0.6545, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.910601658853642e-05, "epoch": 0.42947368421052634, "percentage": 8.59, "elapsed_time": "0:20:32", "remaining_time": "3:38:34"}
+{"current_steps": 3070, "total_steps": 35625, "loss": 0.6057, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.910016104199833e-05, "epoch": 0.43087719298245614, "percentage": 8.62, "elapsed_time": "0:20:37", "remaining_time": "3:38:39"}
+{"current_steps": 3080, "total_steps": 35625, "loss": 0.5503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.909428673265884e-05, "epoch": 0.432280701754386, "percentage": 8.65, "elapsed_time": "0:20:40", "remaining_time": "3:38:32"}
+{"current_steps": 3090, "total_steps": 35625, "loss": 0.6534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.90883936650913e-05, "epoch": 0.4336842105263158, "percentage": 8.67, "elapsed_time": "0:20:45", "remaining_time": "3:38:28"}
+{"current_steps": 3100, "total_steps": 35625, "loss": 0.6696, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.908248184388367e-05, "epoch": 0.43508771929824563, "percentage": 8.7, "elapsed_time": "0:20:48", "remaining_time": "3:38:19"}
+{"current_steps": 3110, "total_steps": 35625, "loss": 0.5936, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.90765512736385e-05, "epoch": 0.43649122807017543, "percentage": 8.73, "elapsed_time": "0:20:52", "remaining_time": "3:38:11"}
+{"current_steps": 3120, "total_steps": 35625, "loss": 0.6154, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.907060195897296e-05, "epoch": 0.4378947368421053, "percentage": 8.76, "elapsed_time": "0:20:56", "remaining_time": "3:38:08"}
+{"current_steps": 3130, "total_steps": 35625, "loss": 0.7975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.906463390451878e-05, "epoch": 0.4392982456140351, "percentage": 8.79, "elapsed_time": "0:21:00", "remaining_time": "3:38:06"}
+{"current_steps": 3140, "total_steps": 35625, "loss": 0.6742, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9058647114922286e-05, "epoch": 0.44070175438596493, "percentage": 8.81, "elapsed_time": "0:21:03", "remaining_time": "3:37:56"}
+{"current_steps": 3150, "total_steps": 35625, "loss": 0.6221, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9052641594844416e-05, "epoch": 0.4421052631578947, "percentage": 8.84, "elapsed_time": "0:21:09", "remaining_time": "3:38:03"}
+{"current_steps": 3160, "total_steps": 35625, "loss": 0.7789, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9046617348960666e-05, "epoch": 0.4435087719298246, "percentage": 8.87, "elapsed_time": "0:21:13", "remaining_time": "3:38:02"}
+{"current_steps": 3170, "total_steps": 35625, "loss": 0.6668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.904057438196111e-05, "epoch": 0.44491228070175437, "percentage": 8.9, "elapsed_time": "0:21:17", "remaining_time": "3:38:01"}
+{"current_steps": 3180, "total_steps": 35625, "loss": 0.6732, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.903451269855043e-05, "epoch": 0.4463157894736842, "percentage": 8.93, "elapsed_time": "0:21:21", "remaining_time": "3:37:56"}
+{"current_steps": 3190, "total_steps": 35625, "loss": 0.652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9028432303447826e-05, "epoch": 0.447719298245614, "percentage": 8.95, "elapsed_time": "0:21:26", "remaining_time": "3:38:00"}
+{"current_steps": 3200, "total_steps": 35625, "loss": 0.7734, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.902233320138711e-05, "epoch": 0.44912280701754387, "percentage": 8.98, "elapsed_time": "0:21:31", "remaining_time": "3:38:03"}
+{"current_steps": 3210, "total_steps": 35625, "loss": 0.6524, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.901621539711664e-05, "epoch": 0.45052631578947366, "percentage": 9.01, "elapsed_time": "0:21:35", "remaining_time": "3:37:57"}
+{"current_steps": 3220, "total_steps": 35625, "loss": 0.5631, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.901007889539933e-05, "epoch": 0.4519298245614035, "percentage": 9.04, "elapsed_time": "0:21:39", "remaining_time": "3:37:56"}
+{"current_steps": 3230, "total_steps": 35625, "loss": 0.6949, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.900392370101266e-05, "epoch": 0.4533333333333333, "percentage": 9.07, "elapsed_time": "0:21:43", "remaining_time": "3:37:51"}
+{"current_steps": 3240, "total_steps": 35625, "loss": 0.7445, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.899774981874867e-05, "epoch": 0.45473684210526316, "percentage": 9.09, "elapsed_time": "0:21:46", "remaining_time": "3:37:41"}
+{"current_steps": 3250, "total_steps": 35625, "loss": 0.6329, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8991557253413924e-05, "epoch": 0.45614035087719296, "percentage": 9.12, "elapsed_time": "0:21:49", "remaining_time": "3:37:29"}
+{"current_steps": 3260, "total_steps": 35625, "loss": 0.6808, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8985346009829546e-05, "epoch": 0.4575438596491228, "percentage": 9.15, "elapsed_time": "0:21:53", "remaining_time": "3:37:16"}
+{"current_steps": 3270, "total_steps": 35625, "loss": 0.6464, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8979116092831223e-05, "epoch": 0.4589473684210526, "percentage": 9.18, "elapsed_time": "0:21:58", "remaining_time": "3:37:21"}
+{"current_steps": 3280, "total_steps": 35625, "loss": 0.6997, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.897286750726913e-05, "epoch": 0.46035087719298246, "percentage": 9.21, "elapsed_time": "0:22:01", "remaining_time": "3:37:08"}
+{"current_steps": 3290, "total_steps": 35625, "loss": 0.642, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8966600258008024e-05, "epoch": 0.4617543859649123, "percentage": 9.24, "elapsed_time": "0:22:05", "remaining_time": "3:37:02"}
+{"current_steps": 3300, "total_steps": 35625, "loss": 0.651, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.896031434992717e-05, "epoch": 0.4631578947368421, "percentage": 9.26, "elapsed_time": "0:22:08", "remaining_time": "3:36:56"}
+{"current_steps": 3310, "total_steps": 35625, "loss": 0.6899, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8954009787920365e-05, "epoch": 0.46456140350877195, "percentage": 9.29, "elapsed_time": "0:22:14", "remaining_time": "3:37:08"}
+{"current_steps": 3320, "total_steps": 35625, "loss": 0.7559, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.894768657689592e-05, "epoch": 0.46596491228070175, "percentage": 9.32, "elapsed_time": "0:22:18", "remaining_time": "3:37:00"}
+{"current_steps": 3330, "total_steps": 35625, "loss": 0.6473, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8941344721776675e-05, "epoch": 0.4673684210526316, "percentage": 9.35, "elapsed_time": "0:22:22", "remaining_time": "3:36:59"}
+{"current_steps": 3340, "total_steps": 35625, "loss": 0.726, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.893498422749997e-05, "epoch": 0.4687719298245614, "percentage": 9.38, "elapsed_time": "0:22:27", "remaining_time": "3:37:06"}
+{"current_steps": 3350, "total_steps": 35625, "loss": 0.6, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8928605099017696e-05, "epoch": 0.47017543859649125, "percentage": 9.4, "elapsed_time": "0:22:31", "remaining_time": "3:36:57"}
+{"current_steps": 3360, "total_steps": 35625, "loss": 0.6202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.89222073412962e-05, "epoch": 0.47157894736842104, "percentage": 9.43, "elapsed_time": "0:22:36", "remaining_time": "3:37:01"}
+{"current_steps": 3370, "total_steps": 35625, "loss": 0.6717, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8915790959316356e-05, "epoch": 0.4729824561403509, "percentage": 9.46, "elapsed_time": "0:22:38", "remaining_time": "3:36:46"}
+{"current_steps": 3380, "total_steps": 35625, "loss": 0.6328, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.890935595807355e-05, "epoch": 0.4743859649122807, "percentage": 9.49, "elapsed_time": "0:22:41", "remaining_time": "3:36:32"}
+{"current_steps": 3390, "total_steps": 35625, "loss": 0.7271, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.890290234257764e-05, "epoch": 0.47578947368421054, "percentage": 9.52, "elapsed_time": "0:22:44", "remaining_time": "3:36:18"}
+{"current_steps": 3400, "total_steps": 35625, "loss": 0.582, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.889643011785299e-05, "epoch": 0.47719298245614034, "percentage": 9.54, "elapsed_time": "0:22:48", "remaining_time": "3:36:12"}
+{"current_steps": 3410, "total_steps": 35625, "loss": 0.6851, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.888993928893846e-05, "epoch": 0.4785964912280702, "percentage": 9.57, "elapsed_time": "0:22:51", "remaining_time": "3:36:00"}
+{"current_steps": 3420, "total_steps": 35625, "loss": 0.583, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.888342986088736e-05, "epoch": 0.48, "percentage": 9.6, "elapsed_time": "0:22:56", "remaining_time": "3:35:59"}
+{"current_steps": 3430, "total_steps": 35625, "loss": 0.6261, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.887690183876752e-05, "epoch": 0.48140350877192983, "percentage": 9.63, "elapsed_time": "0:22:59", "remaining_time": "3:35:46"}
+{"current_steps": 3440, "total_steps": 35625, "loss": 0.6563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.887035522766122e-05, "epoch": 0.48280701754385963, "percentage": 9.66, "elapsed_time": "0:23:03", "remaining_time": "3:35:42"}
+{"current_steps": 3450, "total_steps": 35625, "loss": 0.5178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.886379003266523e-05, "epoch": 0.4842105263157895, "percentage": 9.68, "elapsed_time": "0:23:06", "remaining_time": "3:35:28"}
+{"current_steps": 3460, "total_steps": 35625, "loss": 0.824, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.885720625889078e-05, "epoch": 0.4856140350877193, "percentage": 9.71, "elapsed_time": "0:23:09", "remaining_time": "3:35:16"}
+{"current_steps": 3470, "total_steps": 35625, "loss": 0.6821, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8850603911463556e-05, "epoch": 0.4870175438596491, "percentage": 9.74, "elapsed_time": "0:23:13", "remaining_time": "3:35:10"}
+{"current_steps": 3480, "total_steps": 35625, "loss": 0.6955, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8843982995523704e-05, "epoch": 0.4884210526315789, "percentage": 9.77, "elapsed_time": "0:23:17", "remaining_time": "3:35:11"}
+{"current_steps": 3490, "total_steps": 35625, "loss": 0.6447, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.883734351622586e-05, "epoch": 0.4898245614035088, "percentage": 9.8, "elapsed_time": "0:23:21", "remaining_time": "3:35:08"}
+{"current_steps": 3500, "total_steps": 35625, "loss": 0.6454, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8830685478739057e-05, "epoch": 0.49122807017543857, "percentage": 9.82, "elapsed_time": "0:23:26", "remaining_time": "3:35:10"}
+{"current_steps": 3510, "total_steps": 35625, "loss": 0.6676, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8824008888246834e-05, "epoch": 0.4926315789473684, "percentage": 9.85, "elapsed_time": "0:23:30", "remaining_time": "3:35:05"}
+{"current_steps": 3520, "total_steps": 35625, "loss": 0.5715, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8817313749947115e-05, "epoch": 0.49403508771929827, "percentage": 9.88, "elapsed_time": "0:23:33", "remaining_time": "3:34:53"}
+{"current_steps": 3530, "total_steps": 35625, "loss": 0.6479, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.881060006905232e-05, "epoch": 0.49543859649122807, "percentage": 9.91, "elapsed_time": "0:23:36", "remaining_time": "3:34:40"}
+{"current_steps": 3540, "total_steps": 35625, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.880386785078925e-05, "epoch": 0.4968421052631579, "percentage": 9.94, "elapsed_time": "0:23:40", "remaining_time": "3:34:34"}
+{"current_steps": 3550, "total_steps": 35625, "loss": 0.6503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.87971171003992e-05, "epoch": 0.4982456140350877, "percentage": 9.96, "elapsed_time": "0:23:43", "remaining_time": "3:34:22"}
+{"current_steps": 3560, "total_steps": 35625, "loss": 0.6525, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.879034782313786e-05, "epoch": 0.49964912280701756, "percentage": 9.99, "elapsed_time": "0:23:46", "remaining_time": "3:34:09"}
+{"current_steps": 3570, "total_steps": 35625, "loss": 0.633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.878356002427532e-05, "epoch": 0.5010526315789474, "percentage": 10.02, "elapsed_time": "0:23:50", "remaining_time": "3:34:01"}
+{"current_steps": 3580, "total_steps": 35625, "loss": 0.6184, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.877675370909612e-05, "epoch": 0.5024561403508772, "percentage": 10.05, "elapsed_time": "0:23:54", "remaining_time": "3:33:56"}
+{"current_steps": 3590, "total_steps": 35625, "loss": 0.64, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.876992888289923e-05, "epoch": 0.503859649122807, "percentage": 10.08, "elapsed_time": "0:23:58", "remaining_time": "3:33:54"}
+{"current_steps": 3600, "total_steps": 35625, "loss": 0.6238, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.876308555099799e-05, "epoch": 0.5052631578947369, "percentage": 10.11, "elapsed_time": "0:24:01", "remaining_time": "3:33:45"}
+{"current_steps": 3610, "total_steps": 35625, "loss": 0.7433, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.875622371872017e-05, "epoch": 0.5066666666666667, "percentage": 10.13, "elapsed_time": "0:24:05", "remaining_time": "3:33:37"}
+{"current_steps": 3620, "total_steps": 35625, "loss": 0.7031, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.874934339140795e-05, "epoch": 0.5080701754385964, "percentage": 10.16, "elapsed_time": "0:24:10", "remaining_time": "3:33:42"}
+{"current_steps": 3630, "total_steps": 35625, "loss": 0.5443, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8742444574417904e-05, "epoch": 0.5094736842105263, "percentage": 10.19, "elapsed_time": "0:24:14", "remaining_time": "3:33:38"}
+{"current_steps": 3640, "total_steps": 35625, "loss": 0.6728, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.873552727312099e-05, "epoch": 0.5108771929824562, "percentage": 10.22, "elapsed_time": "0:24:20", "remaining_time": "3:33:50"}
+{"current_steps": 3650, "total_steps": 35625, "loss": 0.6321, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.872859149290256e-05, "epoch": 0.512280701754386, "percentage": 10.25, "elapsed_time": "0:24:23", "remaining_time": "3:33:44"}
+{"current_steps": 3660, "total_steps": 35625, "loss": 0.6301, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.872163723916237e-05, "epoch": 0.5136842105263157, "percentage": 10.27, "elapsed_time": "0:24:29", "remaining_time": "3:33:50"}
+{"current_steps": 3670, "total_steps": 35625, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.871466451731453e-05, "epoch": 0.5150877192982456, "percentage": 10.3, "elapsed_time": "0:24:32", "remaining_time": "3:33:37"}
+{"current_steps": 3680, "total_steps": 35625, "loss": 0.5503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.870767333278755e-05, "epoch": 0.5164912280701754, "percentage": 10.33, "elapsed_time": "0:24:37", "remaining_time": "3:33:46"}
+{"current_steps": 3690, "total_steps": 35625, "loss": 0.6342, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.87006636910243e-05, "epoch": 0.5178947368421053, "percentage": 10.36, "elapsed_time": "0:24:40", "remaining_time": "3:33:35"}
+{"current_steps": 3700, "total_steps": 35625, "loss": 0.5933, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8693635597482045e-05, "epoch": 0.519298245614035, "percentage": 10.39, "elapsed_time": "0:24:43", "remaining_time": "3:33:24"}
+{"current_steps": 3710, "total_steps": 35625, "loss": 0.5878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.868658905763238e-05, "epoch": 0.5207017543859649, "percentage": 10.41, "elapsed_time": "0:24:48", "remaining_time": "3:33:28"}
+{"current_steps": 3720, "total_steps": 35625, "loss": 0.5478, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8679524076961284e-05, "epoch": 0.5221052631578947, "percentage": 10.44, "elapsed_time": "0:24:52", "remaining_time": "3:33:17"}
+{"current_steps": 3730, "total_steps": 35625, "loss": 0.6024, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.867244066096909e-05, "epoch": 0.5235087719298246, "percentage": 10.47, "elapsed_time": "0:24:56", "remaining_time": "3:33:13"}
+{"current_steps": 3740, "total_steps": 35625, "loss": 0.6106, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.866533881517046e-05, "epoch": 0.5249122807017544, "percentage": 10.5, "elapsed_time": "0:25:00", "remaining_time": "3:33:11"}
+{"current_steps": 3750, "total_steps": 35625, "loss": 0.602, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.865821854509445e-05, "epoch": 0.5263157894736842, "percentage": 10.53, "elapsed_time": "0:25:05", "remaining_time": "3:33:17"}
+{"current_steps": 3760, "total_steps": 35625, "loss": 0.7147, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.865107985628442e-05, "epoch": 0.527719298245614, "percentage": 10.55, "elapsed_time": "0:25:10", "remaining_time": "3:33:18"}
+{"current_steps": 3770, "total_steps": 35625, "loss": 0.6561, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.86439227542981e-05, "epoch": 0.5291228070175439, "percentage": 10.58, "elapsed_time": "0:25:13", "remaining_time": "3:33:06"}
+{"current_steps": 3780, "total_steps": 35625, "loss": 0.7062, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.863674724470751e-05, "epoch": 0.5305263157894737, "percentage": 10.61, "elapsed_time": "0:25:17", "remaining_time": "3:33:06"}
+{"current_steps": 3790, "total_steps": 35625, "loss": 0.7019, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.862955333309905e-05, "epoch": 0.5319298245614035, "percentage": 10.64, "elapsed_time": "0:25:22", "remaining_time": "3:33:08"}
+{"current_steps": 3800, "total_steps": 35625, "loss": 0.5702, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8622341025073425e-05, "epoch": 0.5333333333333333, "percentage": 10.67, "elapsed_time": "0:25:26", "remaining_time": "3:33:01"}
+{"current_steps": 3810, "total_steps": 35625, "loss": 0.6956, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.861511032624567e-05, "epoch": 0.5347368421052632, "percentage": 10.69, "elapsed_time": "0:25:29", "remaining_time": "3:32:52"}
+{"current_steps": 3820, "total_steps": 35625, "loss": 0.6266, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.860786124224512e-05, "epoch": 0.536140350877193, "percentage": 10.72, "elapsed_time": "0:25:33", "remaining_time": "3:32:44"}
+{"current_steps": 3830, "total_steps": 35625, "loss": 0.6758, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.860059377871544e-05, "epoch": 0.5375438596491228, "percentage": 10.75, "elapsed_time": "0:25:36", "remaining_time": "3:32:38"}
+{"current_steps": 3840, "total_steps": 35625, "loss": 0.6559, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.85933079413146e-05, "epoch": 0.5389473684210526, "percentage": 10.78, "elapsed_time": "0:25:40", "remaining_time": "3:32:35"}
+{"current_steps": 3850, "total_steps": 35625, "loss": 0.6052, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.858600373571487e-05, "epoch": 0.5403508771929825, "percentage": 10.81, "elapsed_time": "0:25:44", "remaining_time": "3:32:23"}
+{"current_steps": 3860, "total_steps": 35625, "loss": 0.6119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8578681167602834e-05, "epoch": 0.5417543859649123, "percentage": 10.84, "elapsed_time": "0:25:47", "remaining_time": "3:32:16"}
+{"current_steps": 3870, "total_steps": 35625, "loss": 0.5872, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8571340242679354e-05, "epoch": 0.5431578947368421, "percentage": 10.86, "elapsed_time": "0:25:50", "remaining_time": "3:32:04"}
+{"current_steps": 3880, "total_steps": 35625, "loss": 0.7302, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.856398096665959e-05, "epoch": 0.5445614035087719, "percentage": 10.89, "elapsed_time": "0:25:53", "remaining_time": "3:31:52"}
+{"current_steps": 3890, "total_steps": 35625, "loss": 0.6889, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8556603345273e-05, "epoch": 0.5459649122807018, "percentage": 10.92, "elapsed_time": "0:25:56", "remaining_time": "3:31:41"}
+{"current_steps": 3900, "total_steps": 35625, "loss": 0.6045, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8549207384263305e-05, "epoch": 0.5473684210526316, "percentage": 10.95, "elapsed_time": "0:26:00", "remaining_time": "3:31:34"}
+{"current_steps": 3910, "total_steps": 35625, "loss": 0.743, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.854179308938852e-05, "epoch": 0.5487719298245614, "percentage": 10.98, "elapsed_time": "0:26:03", "remaining_time": "3:31:24"}
+{"current_steps": 3920, "total_steps": 35625, "loss": 0.6416, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8534360466420926e-05, "epoch": 0.5501754385964912, "percentage": 11.0, "elapsed_time": "0:26:06", "remaining_time": "3:31:13"}
+{"current_steps": 3930, "total_steps": 35625, "loss": 0.5975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.852690952114708e-05, "epoch": 0.5515789473684211, "percentage": 11.03, "elapsed_time": "0:26:10", "remaining_time": "3:31:06"}
+{"current_steps": 3940, "total_steps": 35625, "loss": 0.7975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.851944025936779e-05, "epoch": 0.5529824561403509, "percentage": 11.06, "elapsed_time": "0:26:13", "remaining_time": "3:30:56"}
+{"current_steps": 3950, "total_steps": 35625, "loss": 0.6836, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.851195268689813e-05, "epoch": 0.5543859649122806, "percentage": 11.09, "elapsed_time": "0:26:17", "remaining_time": "3:30:47"}
+{"current_steps": 3960, "total_steps": 35625, "loss": 0.5265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.850444680956745e-05, "epoch": 0.5557894736842105, "percentage": 11.12, "elapsed_time": "0:26:20", "remaining_time": "3:30:40"}
+{"current_steps": 3970, "total_steps": 35625, "loss": 0.687, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8496922633219314e-05, "epoch": 0.5571929824561404, "percentage": 11.14, "elapsed_time": "0:26:24", "remaining_time": "3:30:36"}
+{"current_steps": 3980, "total_steps": 35625, "loss": 0.5644, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8489380163711556e-05, "epoch": 0.5585964912280702, "percentage": 11.17, "elapsed_time": "0:26:29", "remaining_time": "3:30:37"}
+{"current_steps": 3990, "total_steps": 35625, "loss": 0.6013, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.848181940691625e-05, "epoch": 0.56, "percentage": 11.2, "elapsed_time": "0:26:33", "remaining_time": "3:30:31"}
+{"current_steps": 4000, "total_steps": 35625, "loss": 0.724, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8474240368719703e-05, "epoch": 0.5614035087719298, "percentage": 11.23, "elapsed_time": "0:26:38", "remaining_time": "3:30:35"}
+{"current_steps": 4000, "total_steps": 35625, "loss": null, "eval_loss": 0.6521075963973999, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.5614035087719298, "percentage": 11.23, "elapsed_time": "0:26:38", "remaining_time": "3:30:35"}
+{"current_steps": 4010, "total_steps": 35625, "loss": 0.6668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.846664305502245e-05, "epoch": 0.5628070175438596, "percentage": 11.26, "elapsed_time": "0:27:26", "remaining_time": "3:36:20"}
+{"current_steps": 4020, "total_steps": 35625, "loss": 0.6898, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8459027471739284e-05, "epoch": 0.5642105263157895, "percentage": 11.28, "elapsed_time": "0:27:29", "remaining_time": "3:36:08"}
+{"current_steps": 4030, "total_steps": 35625, "loss": 0.6639, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8451393624799165e-05, "epoch": 0.5656140350877193, "percentage": 11.31, "elapsed_time": "0:27:32", "remaining_time": "3:35:58"}
+{"current_steps": 4040, "total_steps": 35625, "loss": 0.7336, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.844374152014532e-05, "epoch": 0.5670175438596491, "percentage": 11.34, "elapsed_time": "0:27:35", "remaining_time": "3:35:45"}
+{"current_steps": 4050, "total_steps": 35625, "loss": 0.6233, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.843607116373518e-05, "epoch": 0.5684210526315789, "percentage": 11.37, "elapsed_time": "0:27:40", "remaining_time": "3:35:47"}
+{"current_steps": 4060, "total_steps": 35625, "loss": 0.6178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8428382561540366e-05, "epoch": 0.5698245614035088, "percentage": 11.4, "elapsed_time": "0:27:44", "remaining_time": "3:35:43"}
+{"current_steps": 4070, "total_steps": 35625, "loss": 0.6121, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8420675719546723e-05, "epoch": 0.5712280701754386, "percentage": 11.42, "elapsed_time": "0:27:48", "remaining_time": "3:35:37"}
+{"current_steps": 4080, "total_steps": 35625, "loss": 0.6225, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8412950643754305e-05, "epoch": 0.5726315789473684, "percentage": 11.45, "elapsed_time": "0:27:51", "remaining_time": "3:35:24"}
+{"current_steps": 4090, "total_steps": 35625, "loss": 0.6502, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.840520734017734e-05, "epoch": 0.5740350877192982, "percentage": 11.48, "elapsed_time": "0:27:55", "remaining_time": "3:35:17"}
+{"current_steps": 4100, "total_steps": 35625, "loss": 0.6799, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.839744581484425e-05, "epoch": 0.5754385964912281, "percentage": 11.51, "elapsed_time": "0:27:58", "remaining_time": "3:35:07"}
+{"current_steps": 4110, "total_steps": 35625, "loss": 0.7671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8389666073797646e-05, "epoch": 0.5768421052631579, "percentage": 11.54, "elapsed_time": "0:28:01", "remaining_time": "3:34:56"}
+{"current_steps": 4120, "total_steps": 35625, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8381868123094335e-05, "epoch": 0.5782456140350877, "percentage": 11.56, "elapsed_time": "0:28:07", "remaining_time": "3:35:01"}
+{"current_steps": 4130, "total_steps": 35625, "loss": 0.6621, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.837405196880529e-05, "epoch": 0.5796491228070175, "percentage": 11.59, "elapsed_time": "0:28:11", "remaining_time": "3:35:02"}
+{"current_steps": 4140, "total_steps": 35625, "loss": 0.601, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.836621761701564e-05, "epoch": 0.5810526315789474, "percentage": 11.62, "elapsed_time": "0:28:15", "remaining_time": "3:34:57"}
+{"current_steps": 4150, "total_steps": 35625, "loss": 0.6818, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.835836507382471e-05, "epoch": 0.5824561403508772, "percentage": 11.65, "elapsed_time": "0:28:19", "remaining_time": "3:34:50"}
+{"current_steps": 4160, "total_steps": 35625, "loss": 0.6688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.835049434534596e-05, "epoch": 0.583859649122807, "percentage": 11.68, "elapsed_time": "0:28:24", "remaining_time": "3:34:50"}
+{"current_steps": 4170, "total_steps": 35625, "loss": 0.7652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8342605437707034e-05, "epoch": 0.5852631578947368, "percentage": 11.71, "elapsed_time": "0:28:27", "remaining_time": "3:34:43"}
+{"current_steps": 4180, "total_steps": 35625, "loss": 0.5381, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8334698357049715e-05, "epoch": 0.5866666666666667, "percentage": 11.73, "elapsed_time": "0:28:30", "remaining_time": "3:34:31"}
+{"current_steps": 4190, "total_steps": 35625, "loss": 0.6786, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.832677310952993e-05, "epoch": 0.5880701754385965, "percentage": 11.76, "elapsed_time": "0:28:34", "remaining_time": "3:34:21"}
+{"current_steps": 4200, "total_steps": 35625, "loss": 0.629, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.831882970131777e-05, "epoch": 0.5894736842105263, "percentage": 11.79, "elapsed_time": "0:28:39", "remaining_time": "3:34:23"}
+{"current_steps": 4210, "total_steps": 35625, "loss": 0.6021, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.831086813859743e-05, "epoch": 0.5908771929824561, "percentage": 11.82, "elapsed_time": "0:28:42", "remaining_time": "3:34:15"}
+{"current_steps": 4220, "total_steps": 35625, "loss": 0.5706, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.830288842756728e-05, "epoch": 0.592280701754386, "percentage": 11.85, "elapsed_time": "0:28:45", "remaining_time": "3:34:04"}
+{"current_steps": 4230, "total_steps": 35625, "loss": 0.6716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8294890574439784e-05, "epoch": 0.5936842105263158, "percentage": 11.87, "elapsed_time": "0:28:48", "remaining_time": "3:33:51"}
+{"current_steps": 4240, "total_steps": 35625, "loss": 0.6315, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.828687458544155e-05, "epoch": 0.5950877192982457, "percentage": 11.9, "elapsed_time": "0:28:52", "remaining_time": "3:33:44"}
+{"current_steps": 4250, "total_steps": 35625, "loss": 0.5633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.82788404668133e-05, "epoch": 0.5964912280701754, "percentage": 11.93, "elapsed_time": "0:28:56", "remaining_time": "3:33:40"}
+{"current_steps": 4260, "total_steps": 35625, "loss": 0.6747, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.827078822480987e-05, "epoch": 0.5978947368421053, "percentage": 11.96, "elapsed_time": "0:29:00", "remaining_time": "3:33:31"}
+{"current_steps": 4270, "total_steps": 35625, "loss": 0.6555, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.826271786570021e-05, "epoch": 0.5992982456140351, "percentage": 11.99, "elapsed_time": "0:29:04", "remaining_time": "3:33:28"}
+{"current_steps": 4280, "total_steps": 35625, "loss": 0.5987, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.825462939576737e-05, "epoch": 0.600701754385965, "percentage": 12.01, "elapsed_time": "0:29:07", "remaining_time": "3:33:17"}
+{"current_steps": 4290, "total_steps": 35625, "loss": 0.6753, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8246522821308495e-05, "epoch": 0.6021052631578947, "percentage": 12.04, "elapsed_time": "0:29:13", "remaining_time": "3:33:25"}
+{"current_steps": 4300, "total_steps": 35625, "loss": 0.5856, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.823839814863484e-05, "epoch": 0.6035087719298246, "percentage": 12.07, "elapsed_time": "0:29:17", "remaining_time": "3:33:21"}
+{"current_steps": 4310, "total_steps": 35625, "loss": 0.6204, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.823025538407173e-05, "epoch": 0.6049122807017544, "percentage": 12.1, "elapsed_time": "0:29:20", "remaining_time": "3:33:12"}
+{"current_steps": 4320, "total_steps": 35625, "loss": 0.5866, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.82220945339586e-05, "epoch": 0.6063157894736843, "percentage": 12.13, "elapsed_time": "0:29:24", "remaining_time": "3:33:07"}
+{"current_steps": 4330, "total_steps": 35625, "loss": 0.7384, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8213915604648944e-05, "epoch": 0.607719298245614, "percentage": 12.15, "elapsed_time": "0:29:27", "remaining_time": "3:32:55"}
+{"current_steps": 4340, "total_steps": 35625, "loss": 0.7113, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.820571860251034e-05, "epoch": 0.6091228070175438, "percentage": 12.18, "elapsed_time": "0:29:30", "remaining_time": "3:32:44"}
+{"current_steps": 4350, "total_steps": 35625, "loss": 0.6544, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.819750353392443e-05, "epoch": 0.6105263157894737, "percentage": 12.21, "elapsed_time": "0:29:34", "remaining_time": "3:32:34"}
+{"current_steps": 4360, "total_steps": 35625, "loss": 0.6317, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.818927040528693e-05, "epoch": 0.6119298245614035, "percentage": 12.24, "elapsed_time": "0:29:37", "remaining_time": "3:32:25"}
+{"current_steps": 4370, "total_steps": 35625, "loss": 0.5756, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.818101922300762e-05, "epoch": 0.6133333333333333, "percentage": 12.27, "elapsed_time": "0:29:40", "remaining_time": "3:32:16"}
+{"current_steps": 4380, "total_steps": 35625, "loss": 0.687, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8172749993510315e-05, "epoch": 0.6147368421052631, "percentage": 12.29, "elapsed_time": "0:29:44", "remaining_time": "3:32:13"}
+{"current_steps": 4390, "total_steps": 35625, "loss": 0.6573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.81644627232329e-05, "epoch": 0.616140350877193, "percentage": 12.32, "elapsed_time": "0:29:48", "remaining_time": "3:32:05"}
+{"current_steps": 4400, "total_steps": 35625, "loss": 0.6787, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.81561574186273e-05, "epoch": 0.6175438596491228, "percentage": 12.35, "elapsed_time": "0:29:51", "remaining_time": "3:31:54"}
+{"current_steps": 4410, "total_steps": 35625, "loss": 0.5503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.814783408615948e-05, "epoch": 0.6189473684210526, "percentage": 12.38, "elapsed_time": "0:29:55", "remaining_time": "3:31:51"}
+{"current_steps": 4420, "total_steps": 35625, "loss": 0.6495, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.813949273230944e-05, "epoch": 0.6203508771929824, "percentage": 12.41, "elapsed_time": "0:30:00", "remaining_time": "3:31:54"}
+{"current_steps": 4430, "total_steps": 35625, "loss": 0.5845, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8131133363571214e-05, "epoch": 0.6217543859649123, "percentage": 12.44, "elapsed_time": "0:30:04", "remaining_time": "3:31:47"}
+{"current_steps": 4440, "total_steps": 35625, "loss": 0.6093, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8122755986452845e-05, "epoch": 0.6231578947368421, "percentage": 12.46, "elapsed_time": "0:30:07", "remaining_time": "3:31:37"}
+{"current_steps": 4450, "total_steps": 35625, "loss": 0.6465, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8114360607476416e-05, "epoch": 0.624561403508772, "percentage": 12.49, "elapsed_time": "0:30:12", "remaining_time": "3:31:36"}
+{"current_steps": 4460, "total_steps": 35625, "loss": 0.6228, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.810594723317801e-05, "epoch": 0.6259649122807017, "percentage": 12.52, "elapsed_time": "0:30:15", "remaining_time": "3:31:26"}
+{"current_steps": 4470, "total_steps": 35625, "loss": 0.657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.809751587010774e-05, "epoch": 0.6273684210526316, "percentage": 12.55, "elapsed_time": "0:30:18", "remaining_time": "3:31:15"}
+{"current_steps": 4480, "total_steps": 35625, "loss": 0.6068, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.80890665248297e-05, "epoch": 0.6287719298245614, "percentage": 12.58, "elapsed_time": "0:30:22", "remaining_time": "3:31:07"}
+{"current_steps": 4490, "total_steps": 35625, "loss": 0.7177, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.808059920392201e-05, "epoch": 0.6301754385964913, "percentage": 12.6, "elapsed_time": "0:30:26", "remaining_time": "3:31:03"}
+{"current_steps": 4500, "total_steps": 35625, "loss": 0.6333, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.807211391397674e-05, "epoch": 0.631578947368421, "percentage": 12.63, "elapsed_time": "0:30:29", "remaining_time": "3:30:52"}
+{"current_steps": 4510, "total_steps": 35625, "loss": 0.6396, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.806361066160001e-05, "epoch": 0.6329824561403509, "percentage": 12.66, "elapsed_time": "0:30:35", "remaining_time": "3:31:03"}
+{"current_steps": 4520, "total_steps": 35625, "loss": 0.622, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8055089453411875e-05, "epoch": 0.6343859649122807, "percentage": 12.69, "elapsed_time": "0:30:38", "remaining_time": "3:30:53"}
+{"current_steps": 4530, "total_steps": 35625, "loss": 0.6551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.80465502960464e-05, "epoch": 0.6357894736842106, "percentage": 12.72, "elapsed_time": "0:30:43", "remaining_time": "3:30:54"}
+{"current_steps": 4540, "total_steps": 35625, "loss": 0.6874, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.80379931961516e-05, "epoch": 0.6371929824561403, "percentage": 12.74, "elapsed_time": "0:30:46", "remaining_time": "3:30:44"}
+{"current_steps": 4550, "total_steps": 35625, "loss": 0.5982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8029418160389484e-05, "epoch": 0.6385964912280702, "percentage": 12.77, "elapsed_time": "0:30:49", "remaining_time": "3:30:33"}
+{"current_steps": 4560, "total_steps": 35625, "loss": 0.5541, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8020825195435994e-05, "epoch": 0.64, "percentage": 12.8, "elapsed_time": "0:30:53", "remaining_time": "3:30:25"}
+{"current_steps": 4570, "total_steps": 35625, "loss": 0.7297, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8012214307981064e-05, "epoch": 0.6414035087719299, "percentage": 12.83, "elapsed_time": "0:30:58", "remaining_time": "3:30:27"}
+{"current_steps": 4580, "total_steps": 35625, "loss": 0.7694, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.800358550472855e-05, "epoch": 0.6428070175438596, "percentage": 12.86, "elapsed_time": "0:31:01", "remaining_time": "3:30:18"}
+{"current_steps": 4590, "total_steps": 35625, "loss": 0.6194, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.799493879239628e-05, "epoch": 0.6442105263157895, "percentage": 12.88, "elapsed_time": "0:31:04", "remaining_time": "3:30:08"}
+{"current_steps": 4600, "total_steps": 35625, "loss": 0.6358, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7986274177716024e-05, "epoch": 0.6456140350877193, "percentage": 12.91, "elapsed_time": "0:31:08", "remaining_time": "3:30:01"}
+{"current_steps": 4610, "total_steps": 35625, "loss": 0.6828, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.797759166743346e-05, "epoch": 0.6470175438596492, "percentage": 12.94, "elapsed_time": "0:31:12", "remaining_time": "3:29:57"}
+{"current_steps": 4620, "total_steps": 35625, "loss": 0.7163, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7968891268308246e-05, "epoch": 0.6484210526315789, "percentage": 12.97, "elapsed_time": "0:31:16", "remaining_time": "3:29:52"}
+{"current_steps": 4630, "total_steps": 35625, "loss": 0.5935, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.796017298711391e-05, "epoch": 0.6498245614035087, "percentage": 13.0, "elapsed_time": "0:31:19", "remaining_time": "3:29:43"}
+{"current_steps": 4640, "total_steps": 35625, "loss": 0.5696, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.795143683063797e-05, "epoch": 0.6512280701754386, "percentage": 13.02, "elapsed_time": "0:31:22", "remaining_time": "3:29:33"}
+{"current_steps": 4650, "total_steps": 35625, "loss": 0.5665, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7942682805681797e-05, "epoch": 0.6526315789473685, "percentage": 13.05, "elapsed_time": "0:31:25", "remaining_time": "3:29:21"}
+{"current_steps": 4660, "total_steps": 35625, "loss": 0.6242, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.79339109190607e-05, "epoch": 0.6540350877192982, "percentage": 13.08, "elapsed_time": "0:31:29", "remaining_time": "3:29:17"}
+{"current_steps": 4670, "total_steps": 35625, "loss": 0.6259, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.792512117760391e-05, "epoch": 0.655438596491228, "percentage": 13.11, "elapsed_time": "0:31:32", "remaining_time": "3:29:06"}
+{"current_steps": 4680, "total_steps": 35625, "loss": 0.6757, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7916313588154514e-05, "epoch": 0.6568421052631579, "percentage": 13.14, "elapsed_time": "0:31:36", "remaining_time": "3:28:58"}
+{"current_steps": 4690, "total_steps": 35625, "loss": 0.6324, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.790748815756954e-05, "epoch": 0.6582456140350877, "percentage": 13.16, "elapsed_time": "0:31:40", "remaining_time": "3:28:54"}
+{"current_steps": 4700, "total_steps": 35625, "loss": 0.5834, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.78986448927199e-05, "epoch": 0.6596491228070176, "percentage": 13.19, "elapsed_time": "0:31:44", "remaining_time": "3:28:51"}
+{"current_steps": 4710, "total_steps": 35625, "loss": 0.6985, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.788978380049036e-05, "epoch": 0.6610526315789473, "percentage": 13.22, "elapsed_time": "0:31:47", "remaining_time": "3:28:41"}
+{"current_steps": 4720, "total_steps": 35625, "loss": 0.6595, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.78809048877796e-05, "epoch": 0.6624561403508772, "percentage": 13.25, "elapsed_time": "0:31:51", "remaining_time": "3:28:38"}
+{"current_steps": 4730, "total_steps": 35625, "loss": 0.7508, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.787200816150014e-05, "epoch": 0.663859649122807, "percentage": 13.28, "elapsed_time": "0:31:55", "remaining_time": "3:28:34"}
+{"current_steps": 4740, "total_steps": 35625, "loss": 0.6452, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.786309362857839e-05, "epoch": 0.6652631578947369, "percentage": 13.31, "elapsed_time": "0:31:59", "remaining_time": "3:28:29"}
+{"current_steps": 4750, "total_steps": 35625, "loss": 0.6171, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.785416129595463e-05, "epoch": 0.6666666666666666, "percentage": 13.33, "elapsed_time": "0:32:03", "remaining_time": "3:28:25"}
+{"current_steps": 4760, "total_steps": 35625, "loss": 0.654, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.784521117058298e-05, "epoch": 0.6680701754385965, "percentage": 13.36, "elapsed_time": "0:32:08", "remaining_time": "3:28:25"}
+{"current_steps": 4770, "total_steps": 35625, "loss": 0.6674, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7836243259431425e-05, "epoch": 0.6694736842105263, "percentage": 13.39, "elapsed_time": "0:32:12", "remaining_time": "3:28:17"}
+{"current_steps": 4780, "total_steps": 35625, "loss": 0.6319, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7827257569481776e-05, "epoch": 0.6708771929824562, "percentage": 13.42, "elapsed_time": "0:32:15", "remaining_time": "3:28:12"}
+{"current_steps": 4790, "total_steps": 35625, "loss": 0.5856, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.781825410772972e-05, "epoch": 0.6722807017543859, "percentage": 13.45, "elapsed_time": "0:32:19", "remaining_time": "3:28:07"}
+{"current_steps": 4800, "total_steps": 35625, "loss": 0.5919, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.780923288118475e-05, "epoch": 0.6736842105263158, "percentage": 13.47, "elapsed_time": "0:32:23", "remaining_time": "3:28:03"}
+{"current_steps": 4810, "total_steps": 35625, "loss": 0.6192, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.78001938968702e-05, "epoch": 0.6750877192982456, "percentage": 13.5, "elapsed_time": "0:32:27", "remaining_time": "3:27:56"}
+{"current_steps": 4820, "total_steps": 35625, "loss": 0.6628, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.779113716182323e-05, "epoch": 0.6764912280701755, "percentage": 13.53, "elapsed_time": "0:32:30", "remaining_time": "3:27:48"}
+{"current_steps": 4830, "total_steps": 35625, "loss": 0.6451, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.778206268309482e-05, "epoch": 0.6778947368421052, "percentage": 13.56, "elapsed_time": "0:32:35", "remaining_time": "3:27:50"}
+{"current_steps": 4840, "total_steps": 35625, "loss": 0.6823, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.777297046774977e-05, "epoch": 0.6792982456140351, "percentage": 13.59, "elapsed_time": "0:32:41", "remaining_time": "3:27:53"}
+{"current_steps": 4850, "total_steps": 35625, "loss": 0.6916, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7763860522866665e-05, "epoch": 0.6807017543859649, "percentage": 13.61, "elapsed_time": "0:32:44", "remaining_time": "3:27:48"}
+{"current_steps": 4860, "total_steps": 35625, "loss": 0.5936, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.775473285553792e-05, "epoch": 0.6821052631578948, "percentage": 13.64, "elapsed_time": "0:32:48", "remaining_time": "3:27:40"}
+{"current_steps": 4870, "total_steps": 35625, "loss": 0.7202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.774558747286973e-05, "epoch": 0.6835087719298245, "percentage": 13.67, "elapsed_time": "0:32:51", "remaining_time": "3:27:32"}
+{"current_steps": 4880, "total_steps": 35625, "loss": 0.6405, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.77364243819821e-05, "epoch": 0.6849122807017544, "percentage": 13.7, "elapsed_time": "0:32:55", "remaining_time": "3:27:23"}
+{"current_steps": 4890, "total_steps": 35625, "loss": 0.6704, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7727243590008806e-05, "epoch": 0.6863157894736842, "percentage": 13.73, "elapsed_time": "0:32:59", "remaining_time": "3:27:18"}
+{"current_steps": 4900, "total_steps": 35625, "loss": 0.6304, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.771804510409741e-05, "epoch": 0.6877192982456141, "percentage": 13.75, "elapsed_time": "0:33:02", "remaining_time": "3:27:13"}
+{"current_steps": 4910, "total_steps": 35625, "loss": 0.6645, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7708828931409236e-05, "epoch": 0.6891228070175439, "percentage": 13.78, "elapsed_time": "0:33:06", "remaining_time": "3:27:09"}
+{"current_steps": 4920, "total_steps": 35625, "loss": 0.7018, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.769959507911941e-05, "epoch": 0.6905263157894737, "percentage": 13.81, "elapsed_time": "0:33:10", "remaining_time": "3:26:59"}
+{"current_steps": 4930, "total_steps": 35625, "loss": 0.5191, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.769034355441678e-05, "epoch": 0.6919298245614035, "percentage": 13.84, "elapsed_time": "0:33:13", "remaining_time": "3:26:51"}
+{"current_steps": 4940, "total_steps": 35625, "loss": 0.6723, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7681074364503995e-05, "epoch": 0.6933333333333334, "percentage": 13.87, "elapsed_time": "0:33:16", "remaining_time": "3:26:41"}
+{"current_steps": 4950, "total_steps": 35625, "loss": 0.7069, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.767178751659743e-05, "epoch": 0.6947368421052632, "percentage": 13.89, "elapsed_time": "0:33:19", "remaining_time": "3:26:30"}
+{"current_steps": 4960, "total_steps": 35625, "loss": 0.6333, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7662483017927215e-05, "epoch": 0.696140350877193, "percentage": 13.92, "elapsed_time": "0:33:22", "remaining_time": "3:26:21"}
+{"current_steps": 4970, "total_steps": 35625, "loss": 0.7116, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.765316087573722e-05, "epoch": 0.6975438596491228, "percentage": 13.95, "elapsed_time": "0:33:26", "remaining_time": "3:26:18"}
+{"current_steps": 4980, "total_steps": 35625, "loss": 0.5517, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7643821097285044e-05, "epoch": 0.6989473684210527, "percentage": 13.98, "elapsed_time": "0:33:31", "remaining_time": "3:26:19"}
+{"current_steps": 4990, "total_steps": 35625, "loss": 0.6856, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.763446368984205e-05, "epoch": 0.7003508771929825, "percentage": 14.01, "elapsed_time": "0:33:36", "remaining_time": "3:26:19"}
+{"current_steps": 5000, "total_steps": 35625, "loss": 0.6119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.762508866069327e-05, "epoch": 0.7017543859649122, "percentage": 14.04, "elapsed_time": "0:33:39", "remaining_time": "3:26:10"}
+{"current_steps": 5010, "total_steps": 35625, "loss": 0.5645, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7615696017137504e-05, "epoch": 0.7031578947368421, "percentage": 14.06, "elapsed_time": "0:33:43", "remaining_time": "3:26:08"}
+{"current_steps": 5020, "total_steps": 35625, "loss": 0.6506, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.760628576648723e-05, "epoch": 0.7045614035087719, "percentage": 14.09, "elapsed_time": "0:33:47", "remaining_time": "3:25:58"}
+{"current_steps": 5030, "total_steps": 35625, "loss": 0.6092, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.759685791606868e-05, "epoch": 0.7059649122807018, "percentage": 14.12, "elapsed_time": "0:33:50", "remaining_time": "3:25:52"}
+{"current_steps": 5040, "total_steps": 35625, "loss": 0.7659, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.758741247322174e-05, "epoch": 0.7073684210526315, "percentage": 14.15, "elapsed_time": "0:33:55", "remaining_time": "3:25:50"}
+{"current_steps": 5050, "total_steps": 35625, "loss": 0.5774, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7577949445300004e-05, "epoch": 0.7087719298245614, "percentage": 14.18, "elapsed_time": "0:33:59", "remaining_time": "3:25:45"}
+{"current_steps": 5060, "total_steps": 35625, "loss": 0.6234, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.756846883967077e-05, "epoch": 0.7101754385964912, "percentage": 14.2, "elapsed_time": "0:34:04", "remaining_time": "3:25:50"}
+{"current_steps": 5070, "total_steps": 35625, "loss": 0.6456, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.755897066371502e-05, "epoch": 0.7115789473684211, "percentage": 14.23, "elapsed_time": "0:34:07", "remaining_time": "3:25:42"}
+{"current_steps": 5080, "total_steps": 35625, "loss": 0.54, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.754945492482741e-05, "epoch": 0.7129824561403508, "percentage": 14.26, "elapsed_time": "0:34:11", "remaining_time": "3:25:33"}
+{"current_steps": 5090, "total_steps": 35625, "loss": 0.5695, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7539921630416264e-05, "epoch": 0.7143859649122807, "percentage": 14.29, "elapsed_time": "0:34:14", "remaining_time": "3:25:25"}
+{"current_steps": 5100, "total_steps": 35625, "loss": 0.6748, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7530370787903576e-05, "epoch": 0.7157894736842105, "percentage": 14.32, "elapsed_time": "0:34:17", "remaining_time": "3:25:17"}
+{"current_steps": 5110, "total_steps": 35625, "loss": 0.595, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7520802404725007e-05, "epoch": 0.7171929824561404, "percentage": 14.34, "elapsed_time": "0:34:21", "remaining_time": "3:25:13"}
+{"current_steps": 5120, "total_steps": 35625, "loss": 0.641, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.751121648832987e-05, "epoch": 0.7185964912280701, "percentage": 14.37, "elapsed_time": "0:34:26", "remaining_time": "3:25:10"}
+{"current_steps": 5130, "total_steps": 35625, "loss": 0.6345, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.750161304618114e-05, "epoch": 0.72, "percentage": 14.4, "elapsed_time": "0:34:29", "remaining_time": "3:25:03"}
+{"current_steps": 5140, "total_steps": 35625, "loss": 0.5997, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.749199208575541e-05, "epoch": 0.7214035087719298, "percentage": 14.43, "elapsed_time": "0:34:33", "remaining_time": "3:25:00"}
+{"current_steps": 5150, "total_steps": 35625, "loss": 0.6529, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.748235361454293e-05, "epoch": 0.7228070175438597, "percentage": 14.46, "elapsed_time": "0:34:37", "remaining_time": "3:24:51"}
+{"current_steps": 5160, "total_steps": 35625, "loss": 0.5668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7472697640047594e-05, "epoch": 0.7242105263157895, "percentage": 14.48, "elapsed_time": "0:34:40", "remaining_time": "3:24:45"}
+{"current_steps": 5170, "total_steps": 35625, "loss": 0.6433, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7463024169786895e-05, "epoch": 0.7256140350877193, "percentage": 14.51, "elapsed_time": "0:34:45", "remaining_time": "3:24:42"}
+{"current_steps": 5180, "total_steps": 35625, "loss": 0.6749, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.745333321129197e-05, "epoch": 0.7270175438596491, "percentage": 14.54, "elapsed_time": "0:34:48", "remaining_time": "3:24:37"}
+{"current_steps": 5190, "total_steps": 35625, "loss": 0.7041, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.744362477210755e-05, "epoch": 0.728421052631579, "percentage": 14.57, "elapsed_time": "0:34:52", "remaining_time": "3:24:31"}
+{"current_steps": 5200, "total_steps": 35625, "loss": 0.5598, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7433898859792e-05, "epoch": 0.7298245614035088, "percentage": 14.6, "elapsed_time": "0:34:58", "remaining_time": "3:24:36"}
+{"current_steps": 5210, "total_steps": 35625, "loss": 0.6433, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.742415548191728e-05, "epoch": 0.7312280701754386, "percentage": 14.62, "elapsed_time": "0:35:02", "remaining_time": "3:24:31"}
+{"current_steps": 5220, "total_steps": 35625, "loss": 0.6715, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.741439464606893e-05, "epoch": 0.7326315789473684, "percentage": 14.65, "elapsed_time": "0:35:05", "remaining_time": "3:24:26"}
+{"current_steps": 5230, "total_steps": 35625, "loss": 0.6391, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.740461635984609e-05, "epoch": 0.7340350877192983, "percentage": 14.68, "elapsed_time": "0:35:09", "remaining_time": "3:24:20"}
+{"current_steps": 5240, "total_steps": 35625, "loss": 0.5834, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.739482063086152e-05, "epoch": 0.7354385964912281, "percentage": 14.71, "elapsed_time": "0:35:13", "remaining_time": "3:24:16"}
+{"current_steps": 5250, "total_steps": 35625, "loss": 0.7835, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.73850074667415e-05, "epoch": 0.7368421052631579, "percentage": 14.74, "elapsed_time": "0:35:16", "remaining_time": "3:24:07"}
+{"current_steps": 5260, "total_steps": 35625, "loss": 0.6128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.737517687512593e-05, "epoch": 0.7382456140350877, "percentage": 14.76, "elapsed_time": "0:35:20", "remaining_time": "3:24:03"}
+{"current_steps": 5270, "total_steps": 35625, "loss": 0.655, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7365328863668256e-05, "epoch": 0.7396491228070176, "percentage": 14.79, "elapsed_time": "0:35:25", "remaining_time": "3:24:00"}
+{"current_steps": 5280, "total_steps": 35625, "loss": 0.6506, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.735546344003551e-05, "epoch": 0.7410526315789474, "percentage": 14.82, "elapsed_time": "0:35:28", "remaining_time": "3:23:51"}
+{"current_steps": 5290, "total_steps": 35625, "loss": 0.6984, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.734558061190824e-05, "epoch": 0.7424561403508771, "percentage": 14.85, "elapsed_time": "0:35:31", "remaining_time": "3:23:45"}
+{"current_steps": 5300, "total_steps": 35625, "loss": 0.7401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.733568038698057e-05, "epoch": 0.743859649122807, "percentage": 14.88, "elapsed_time": "0:35:34", "remaining_time": "3:23:35"}
+{"current_steps": 5310, "total_steps": 35625, "loss": 0.6432, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.732576277296017e-05, "epoch": 0.7452631578947368, "percentage": 14.91, "elapsed_time": "0:35:40", "remaining_time": "3:23:38"}
+{"current_steps": 5320, "total_steps": 35625, "loss": 0.6687, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.731582777756825e-05, "epoch": 0.7466666666666667, "percentage": 14.93, "elapsed_time": "0:35:43", "remaining_time": "3:23:32"}
+{"current_steps": 5330, "total_steps": 35625, "loss": 0.6489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.730587540853954e-05, "epoch": 0.7480701754385964, "percentage": 14.96, "elapsed_time": "0:35:48", "remaining_time": "3:23:30"}
+{"current_steps": 5340, "total_steps": 35625, "loss": 0.6149, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.729590567362228e-05, "epoch": 0.7494736842105263, "percentage": 14.99, "elapsed_time": "0:35:54", "remaining_time": "3:23:36"}
+{"current_steps": 5350, "total_steps": 35625, "loss": 0.6227, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.728591858057827e-05, "epoch": 0.7508771929824561, "percentage": 15.02, "elapsed_time": "0:35:57", "remaining_time": "3:23:31"}
+{"current_steps": 5360, "total_steps": 35625, "loss": 0.712, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.727591413718282e-05, "epoch": 0.752280701754386, "percentage": 15.05, "elapsed_time": "0:36:01", "remaining_time": "3:23:26"}
+{"current_steps": 5370, "total_steps": 35625, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7265892351224694e-05, "epoch": 0.7536842105263157, "percentage": 15.07, "elapsed_time": "0:36:07", "remaining_time": "3:23:34"}
+{"current_steps": 5380, "total_steps": 35625, "loss": 0.6812, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.725585323050623e-05, "epoch": 0.7550877192982456, "percentage": 15.1, "elapsed_time": "0:36:11", "remaining_time": "3:23:30"}
+{"current_steps": 5390, "total_steps": 35625, "loss": 0.6266, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.72457967828432e-05, "epoch": 0.7564912280701754, "percentage": 15.13, "elapsed_time": "0:36:15", "remaining_time": "3:23:25"}
+{"current_steps": 5400, "total_steps": 35625, "loss": 0.6976, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.723572301606492e-05, "epoch": 0.7578947368421053, "percentage": 15.16, "elapsed_time": "0:36:19", "remaining_time": "3:23:20"}
+{"current_steps": 5410, "total_steps": 35625, "loss": 0.709, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7225631938014134e-05, "epoch": 0.7592982456140351, "percentage": 15.19, "elapsed_time": "0:36:23", "remaining_time": "3:23:12"}
+{"current_steps": 5420, "total_steps": 35625, "loss": 0.5956, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7215523556547116e-05, "epoch": 0.7607017543859649, "percentage": 15.21, "elapsed_time": "0:36:26", "remaining_time": "3:23:03"}
+{"current_steps": 5430, "total_steps": 35625, "loss": 0.6943, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.720539787953357e-05, "epoch": 0.7621052631578947, "percentage": 15.24, "elapsed_time": "0:36:30", "remaining_time": "3:22:59"}
+{"current_steps": 5440, "total_steps": 35625, "loss": 0.6322, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.71952549148567e-05, "epoch": 0.7635087719298246, "percentage": 15.27, "elapsed_time": "0:36:35", "remaining_time": "3:23:03"}
+{"current_steps": 5450, "total_steps": 35625, "loss": 0.6258, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7185094670413134e-05, "epoch": 0.7649122807017544, "percentage": 15.3, "elapsed_time": "0:36:39", "remaining_time": "3:22:55"}
+{"current_steps": 5460, "total_steps": 35625, "loss": 0.6347, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7174917154112984e-05, "epoch": 0.7663157894736842, "percentage": 15.33, "elapsed_time": "0:36:42", "remaining_time": "3:22:48"}
+{"current_steps": 5470, "total_steps": 35625, "loss": 0.6423, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.716472237387979e-05, "epoch": 0.767719298245614, "percentage": 15.35, "elapsed_time": "0:36:47", "remaining_time": "3:22:47"}
+{"current_steps": 5480, "total_steps": 35625, "loss": 0.6614, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.715451033765054e-05, "epoch": 0.7691228070175439, "percentage": 15.38, "elapsed_time": "0:36:51", "remaining_time": "3:22:42"}
+{"current_steps": 5490, "total_steps": 35625, "loss": 0.6326, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.714428105337565e-05, "epoch": 0.7705263157894737, "percentage": 15.41, "elapsed_time": "0:36:54", "remaining_time": "3:22:33"}
+{"current_steps": 5500, "total_steps": 35625, "loss": 0.6146, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.713403452901898e-05, "epoch": 0.7719298245614035, "percentage": 15.44, "elapsed_time": "0:36:59", "remaining_time": "3:22:37"}
+{"current_steps": 5510, "total_steps": 35625, "loss": 0.6061, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7123770772557774e-05, "epoch": 0.7733333333333333, "percentage": 15.47, "elapsed_time": "0:37:04", "remaining_time": "3:22:37"}
+{"current_steps": 5520, "total_steps": 35625, "loss": 0.7423, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.711348979198274e-05, "epoch": 0.7747368421052632, "percentage": 15.49, "elapsed_time": "0:37:07", "remaining_time": "3:22:31"}
+{"current_steps": 5530, "total_steps": 35625, "loss": 0.6648, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.710319159529798e-05, "epoch": 0.776140350877193, "percentage": 15.52, "elapsed_time": "0:37:12", "remaining_time": "3:22:27"}
+{"current_steps": 5540, "total_steps": 35625, "loss": 0.551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.709287619052098e-05, "epoch": 0.7775438596491228, "percentage": 15.55, "elapsed_time": "0:37:15", "remaining_time": "3:22:20"}
+{"current_steps": 5550, "total_steps": 35625, "loss": 0.7394, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.708254358568264e-05, "epoch": 0.7789473684210526, "percentage": 15.58, "elapsed_time": "0:37:18", "remaining_time": "3:22:12"}
+{"current_steps": 5560, "total_steps": 35625, "loss": 0.57, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7072193788827236e-05, "epoch": 0.7803508771929825, "percentage": 15.61, "elapsed_time": "0:37:23", "remaining_time": "3:22:09"}
+{"current_steps": 5570, "total_steps": 35625, "loss": 0.6293, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.706182680801245e-05, "epoch": 0.7817543859649123, "percentage": 15.64, "elapsed_time": "0:37:26", "remaining_time": "3:22:03"}
+{"current_steps": 5580, "total_steps": 35625, "loss": 0.6007, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.705144265130934e-05, "epoch": 0.783157894736842, "percentage": 15.66, "elapsed_time": "0:37:31", "remaining_time": "3:22:04"}
+{"current_steps": 5590, "total_steps": 35625, "loss": 0.5963, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.704104132680231e-05, "epoch": 0.7845614035087719, "percentage": 15.69, "elapsed_time": "0:37:34", "remaining_time": "3:21:54"}
+{"current_steps": 5600, "total_steps": 35625, "loss": 0.7237, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.703062284258916e-05, "epoch": 0.7859649122807018, "percentage": 15.72, "elapsed_time": "0:37:38", "remaining_time": "3:21:50"}
+{"current_steps": 5610, "total_steps": 35625, "loss": 0.6452, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.702018720678103e-05, "epoch": 0.7873684210526316, "percentage": 15.75, "elapsed_time": "0:37:42", "remaining_time": "3:21:46"}
+{"current_steps": 5620, "total_steps": 35625, "loss": 0.6291, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7009734427502426e-05, "epoch": 0.7887719298245615, "percentage": 15.78, "elapsed_time": "0:37:47", "remaining_time": "3:21:43"}
+{"current_steps": 5630, "total_steps": 35625, "loss": 0.5925, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.699926451289119e-05, "epoch": 0.7901754385964912, "percentage": 15.8, "elapsed_time": "0:37:52", "remaining_time": "3:21:46"}
+{"current_steps": 5640, "total_steps": 35625, "loss": 0.7342, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.698877747109852e-05, "epoch": 0.791578947368421, "percentage": 15.83, "elapsed_time": "0:37:56", "remaining_time": "3:21:44"}
+{"current_steps": 5650, "total_steps": 35625, "loss": 0.644, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.697827331028893e-05, "epoch": 0.7929824561403509, "percentage": 15.86, "elapsed_time": "0:38:00", "remaining_time": "3:21:36"}
+{"current_steps": 5660, "total_steps": 35625, "loss": 0.6567, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6967752038640264e-05, "epoch": 0.7943859649122808, "percentage": 15.89, "elapsed_time": "0:38:03", "remaining_time": "3:21:28"}
+{"current_steps": 5670, "total_steps": 35625, "loss": 0.6873, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.695721366434369e-05, "epoch": 0.7957894736842105, "percentage": 15.92, "elapsed_time": "0:38:06", "remaining_time": "3:21:22"}
+{"current_steps": 5680, "total_steps": 35625, "loss": 0.6733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.694665819560371e-05, "epoch": 0.7971929824561403, "percentage": 15.94, "elapsed_time": "0:38:10", "remaining_time": "3:21:13"}
+{"current_steps": 5690, "total_steps": 35625, "loss": 0.642, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.693608564063811e-05, "epoch": 0.7985964912280702, "percentage": 15.97, "elapsed_time": "0:38:13", "remaining_time": "3:21:05"}
+{"current_steps": 5700, "total_steps": 35625, "loss": 0.5438, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.692549600767798e-05, "epoch": 0.8, "percentage": 16.0, "elapsed_time": "0:38:19", "remaining_time": "3:21:12"}
+{"current_steps": 5710, "total_steps": 35625, "loss": 0.6107, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6914889304967725e-05, "epoch": 0.8014035087719298, "percentage": 16.03, "elapsed_time": "0:38:23", "remaining_time": "3:21:05"}
+{"current_steps": 5720, "total_steps": 35625, "loss": 0.5975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.690426554076501e-05, "epoch": 0.8028070175438596, "percentage": 16.06, "elapsed_time": "0:38:27", "remaining_time": "3:21:06"}
+{"current_steps": 5730, "total_steps": 35625, "loss": 0.6563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.689362472334082e-05, "epoch": 0.8042105263157895, "percentage": 16.08, "elapsed_time": "0:38:33", "remaining_time": "3:21:10"}
+{"current_steps": 5740, "total_steps": 35625, "loss": 0.6199, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.688296686097937e-05, "epoch": 0.8056140350877193, "percentage": 16.11, "elapsed_time": "0:38:36", "remaining_time": "3:21:00"}
+{"current_steps": 5750, "total_steps": 35625, "loss": 0.5772, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6872291961978195e-05, "epoch": 0.8070175438596491, "percentage": 16.14, "elapsed_time": "0:38:40", "remaining_time": "3:20:54"}
+{"current_steps": 5760, "total_steps": 35625, "loss": 0.6401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6861600034648064e-05, "epoch": 0.8084210526315789, "percentage": 16.17, "elapsed_time": "0:38:43", "remaining_time": "3:20:47"}
+{"current_steps": 5770, "total_steps": 35625, "loss": 0.6087, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6850891087313e-05, "epoch": 0.8098245614035088, "percentage": 16.2, "elapsed_time": "0:38:48", "remaining_time": "3:20:47"}
+{"current_steps": 5780, "total_steps": 35625, "loss": 0.6973, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6840165128310296e-05, "epoch": 0.8112280701754386, "percentage": 16.22, "elapsed_time": "0:38:52", "remaining_time": "3:20:42"}
+{"current_steps": 5790, "total_steps": 35625, "loss": 0.6509, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6829422165990475e-05, "epoch": 0.8126315789473684, "percentage": 16.25, "elapsed_time": "0:38:55", "remaining_time": "3:20:36"}
+{"current_steps": 5800, "total_steps": 35625, "loss": 0.6092, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6818662208717296e-05, "epoch": 0.8140350877192982, "percentage": 16.28, "elapsed_time": "0:39:00", "remaining_time": "3:20:33"}
+{"current_steps": 5810, "total_steps": 35625, "loss": 0.5864, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.680788526486776e-05, "epoch": 0.8154385964912281, "percentage": 16.31, "elapsed_time": "0:39:04", "remaining_time": "3:20:31"}
+{"current_steps": 5820, "total_steps": 35625, "loss": 0.5736, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.679709134283209e-05, "epoch": 0.8168421052631579, "percentage": 16.34, "elapsed_time": "0:39:08", "remaining_time": "3:20:25"}
+{"current_steps": 5830, "total_steps": 35625, "loss": 0.5982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.678628045101371e-05, "epoch": 0.8182456140350877, "percentage": 16.36, "elapsed_time": "0:39:12", "remaining_time": "3:20:20"}
+{"current_steps": 5840, "total_steps": 35625, "loss": 0.6136, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.677545259782929e-05, "epoch": 0.8196491228070175, "percentage": 16.39, "elapsed_time": "0:39:17", "remaining_time": "3:20:23"}
+{"current_steps": 5850, "total_steps": 35625, "loss": 0.6519, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.676460779170867e-05, "epoch": 0.8210526315789474, "percentage": 16.42, "elapsed_time": "0:39:21", "remaining_time": "3:20:17"}
+{"current_steps": 5860, "total_steps": 35625, "loss": 0.6122, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.675374604109491e-05, "epoch": 0.8224561403508772, "percentage": 16.45, "elapsed_time": "0:39:24", "remaining_time": "3:20:08"}
+{"current_steps": 5870, "total_steps": 35625, "loss": 0.5582, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6742867354444256e-05, "epoch": 0.8238596491228071, "percentage": 16.48, "elapsed_time": "0:39:27", "remaining_time": "3:19:58"}
+{"current_steps": 5880, "total_steps": 35625, "loss": 0.6788, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.673197174022613e-05, "epoch": 0.8252631578947368, "percentage": 16.51, "elapsed_time": "0:39:30", "remaining_time": "3:19:52"}
+{"current_steps": 5890, "total_steps": 35625, "loss": 0.647, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.672105920692316e-05, "epoch": 0.8266666666666667, "percentage": 16.53, "elapsed_time": "0:39:35", "remaining_time": "3:19:51"}
+{"current_steps": 5900, "total_steps": 35625, "loss": 0.6326, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6710129763031095e-05, "epoch": 0.8280701754385965, "percentage": 16.56, "elapsed_time": "0:39:38", "remaining_time": "3:19:43"}
+{"current_steps": 5910, "total_steps": 35625, "loss": 0.6205, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.669918341705891e-05, "epoch": 0.8294736842105264, "percentage": 16.59, "elapsed_time": "0:39:41", "remaining_time": "3:19:35"}
+{"current_steps": 5920, "total_steps": 35625, "loss": 0.699, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.66882201775287e-05, "epoch": 0.8308771929824561, "percentage": 16.62, "elapsed_time": "0:39:46", "remaining_time": "3:19:34"}
+{"current_steps": 5930, "total_steps": 35625, "loss": 0.6147, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.667724005297573e-05, "epoch": 0.832280701754386, "percentage": 16.65, "elapsed_time": "0:39:50", "remaining_time": "3:19:29"}
+{"current_steps": 5940, "total_steps": 35625, "loss": 0.5737, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.66662430519484e-05, "epoch": 0.8336842105263158, "percentage": 16.67, "elapsed_time": "0:39:55", "remaining_time": "3:19:29"}
+{"current_steps": 5950, "total_steps": 35625, "loss": 0.6072, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.665522918300823e-05, "epoch": 0.8350877192982457, "percentage": 16.7, "elapsed_time": "0:39:59", "remaining_time": "3:19:27"}
+{"current_steps": 5960, "total_steps": 35625, "loss": 0.6296, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6644198454729933e-05, "epoch": 0.8364912280701754, "percentage": 16.73, "elapsed_time": "0:40:03", "remaining_time": "3:19:23"}
+{"current_steps": 5970, "total_steps": 35625, "loss": 0.6489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.663315087570128e-05, "epoch": 0.8378947368421052, "percentage": 16.76, "elapsed_time": "0:40:06", "remaining_time": "3:19:13"}
+{"current_steps": 5980, "total_steps": 35625, "loss": 0.6742, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.662208645452321e-05, "epoch": 0.8392982456140351, "percentage": 16.79, "elapsed_time": "0:40:10", "remaining_time": "3:19:11"}
+{"current_steps": 5990, "total_steps": 35625, "loss": 0.573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.661100519980973e-05, "epoch": 0.840701754385965, "percentage": 16.81, "elapsed_time": "0:40:13", "remaining_time": "3:19:02"}
+{"current_steps": 6000, "total_steps": 35625, "loss": 0.6455, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6599907120188005e-05, "epoch": 0.8421052631578947, "percentage": 16.84, "elapsed_time": "0:40:16", "remaining_time": "3:18:53"}
+{"current_steps": 6000, "total_steps": 35625, "loss": null, "eval_loss": 0.6415141820907593, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.8421052631578947, "percentage": 16.84, "elapsed_time": "0:40:16", "remaining_time": "3:18:53"}
+{"current_steps": 6010, "total_steps": 35625, "loss": 0.5362, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.658879222429825e-05, "epoch": 0.8435087719298245, "percentage": 16.87, "elapsed_time": "0:41:05", "remaining_time": "3:22:30"}
+{"current_steps": 6020, "total_steps": 35625, "loss": 0.6321, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.65776605207938e-05, "epoch": 0.8449122807017544, "percentage": 16.9, "elapsed_time": "0:41:09", "remaining_time": "3:22:25"}
+{"current_steps": 6030, "total_steps": 35625, "loss": 0.6208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.656651201834106e-05, "epoch": 0.8463157894736842, "percentage": 16.93, "elapsed_time": "0:41:13", "remaining_time": "3:22:19"}
+{"current_steps": 6040, "total_steps": 35625, "loss": 0.6529, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.655534672561953e-05, "epoch": 0.847719298245614, "percentage": 16.95, "elapsed_time": "0:41:16", "remaining_time": "3:22:09"}
+{"current_steps": 6050, "total_steps": 35625, "loss": 0.6515, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.654416465132177e-05, "epoch": 0.8491228070175438, "percentage": 16.98, "elapsed_time": "0:41:20", "remaining_time": "3:22:03"}
+{"current_steps": 6060, "total_steps": 35625, "loss": 0.613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6532965804153416e-05, "epoch": 0.8505263157894737, "percentage": 17.01, "elapsed_time": "0:41:23", "remaining_time": "3:21:57"}
+{"current_steps": 6070, "total_steps": 35625, "loss": 0.6215, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.652175019283314e-05, "epoch": 0.8519298245614035, "percentage": 17.04, "elapsed_time": "0:41:27", "remaining_time": "3:21:50"}
+{"current_steps": 6080, "total_steps": 35625, "loss": 0.7427, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6510517826092695e-05, "epoch": 0.8533333333333334, "percentage": 17.07, "elapsed_time": "0:41:30", "remaining_time": "3:21:42"}
+{"current_steps": 6090, "total_steps": 35625, "loss": 0.58, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.649926871267685e-05, "epoch": 0.8547368421052631, "percentage": 17.09, "elapsed_time": "0:41:34", "remaining_time": "3:21:38"}
+{"current_steps": 6100, "total_steps": 35625, "loss": 0.6916, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6488002861343425e-05, "epoch": 0.856140350877193, "percentage": 17.12, "elapsed_time": "0:41:38", "remaining_time": "3:21:33"}
+{"current_steps": 6110, "total_steps": 35625, "loss": 0.6073, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.647672028086328e-05, "epoch": 0.8575438596491228, "percentage": 17.15, "elapsed_time": "0:41:42", "remaining_time": "3:21:28"}
+{"current_steps": 6120, "total_steps": 35625, "loss": 0.6273, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.646542098002029e-05, "epoch": 0.8589473684210527, "percentage": 17.18, "elapsed_time": "0:41:45", "remaining_time": "3:21:20"}
+{"current_steps": 6130, "total_steps": 35625, "loss": 0.6657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.645410496761135e-05, "epoch": 0.8603508771929824, "percentage": 17.21, "elapsed_time": "0:41:49", "remaining_time": "3:21:13"}
+{"current_steps": 6140, "total_steps": 35625, "loss": 0.6861, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.644277225244635e-05, "epoch": 0.8617543859649123, "percentage": 17.24, "elapsed_time": "0:41:52", "remaining_time": "3:21:04"}
+{"current_steps": 6150, "total_steps": 35625, "loss": 0.6834, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6431422843348216e-05, "epoch": 0.8631578947368421, "percentage": 17.26, "elapsed_time": "0:41:55", "remaining_time": "3:20:57"}
+{"current_steps": 6160, "total_steps": 35625, "loss": 0.6098, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.642005674915284e-05, "epoch": 0.864561403508772, "percentage": 17.29, "elapsed_time": "0:42:00", "remaining_time": "3:20:54"}
+{"current_steps": 6170, "total_steps": 35625, "loss": 0.6831, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.640867397870912e-05, "epoch": 0.8659649122807017, "percentage": 17.32, "elapsed_time": "0:42:03", "remaining_time": "3:20:49"}
+{"current_steps": 6180, "total_steps": 35625, "loss": 0.5846, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.639727454087892e-05, "epoch": 0.8673684210526316, "percentage": 17.35, "elapsed_time": "0:42:06", "remaining_time": "3:20:39"}
+{"current_steps": 6190, "total_steps": 35625, "loss": 0.6436, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.638585844453711e-05, "epoch": 0.8687719298245614, "percentage": 17.38, "elapsed_time": "0:42:11", "remaining_time": "3:20:40"}
+{"current_steps": 6200, "total_steps": 35625, "loss": 0.7538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6374425698571514e-05, "epoch": 0.8701754385964913, "percentage": 17.4, "elapsed_time": "0:42:15", "remaining_time": "3:20:32"}
+{"current_steps": 6210, "total_steps": 35625, "loss": 0.596, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.63629763118829e-05, "epoch": 0.871578947368421, "percentage": 17.43, "elapsed_time": "0:42:19", "remaining_time": "3:20:29"}
+{"current_steps": 6220, "total_steps": 35625, "loss": 0.5844, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6351510293385026e-05, "epoch": 0.8729824561403509, "percentage": 17.46, "elapsed_time": "0:42:23", "remaining_time": "3:20:25"}
+{"current_steps": 6230, "total_steps": 35625, "loss": 0.5785, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.634002765200456e-05, "epoch": 0.8743859649122807, "percentage": 17.49, "elapsed_time": "0:42:26", "remaining_time": "3:20:17"}
+{"current_steps": 6240, "total_steps": 35625, "loss": 0.5728, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.632852839668115e-05, "epoch": 0.8757894736842106, "percentage": 17.52, "elapsed_time": "0:42:30", "remaining_time": "3:20:09"}
+{"current_steps": 6250, "total_steps": 35625, "loss": 0.6317, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6317012536367354e-05, "epoch": 0.8771929824561403, "percentage": 17.54, "elapsed_time": "0:42:33", "remaining_time": "3:20:02"}
+{"current_steps": 6260, "total_steps": 35625, "loss": 0.6152, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.630548008002866e-05, "epoch": 0.8785964912280702, "percentage": 17.57, "elapsed_time": "0:42:38", "remaining_time": "3:20:02"}
+{"current_steps": 6270, "total_steps": 35625, "loss": 0.64, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.629393103664349e-05, "epoch": 0.88, "percentage": 17.6, "elapsed_time": "0:42:41", "remaining_time": "3:19:54"}
+{"current_steps": 6280, "total_steps": 35625, "loss": 0.5923, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6282365415203164e-05, "epoch": 0.8814035087719299, "percentage": 17.63, "elapsed_time": "0:42:45", "remaining_time": "3:19:49"}
+{"current_steps": 6290, "total_steps": 35625, "loss": 0.6745, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.627078322471191e-05, "epoch": 0.8828070175438596, "percentage": 17.66, "elapsed_time": "0:42:49", "remaining_time": "3:19:42"}
+{"current_steps": 6300, "total_steps": 35625, "loss": 0.5819, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.625918447418687e-05, "epoch": 0.8842105263157894, "percentage": 17.68, "elapsed_time": "0:42:53", "remaining_time": "3:19:39"}
+{"current_steps": 6310, "total_steps": 35625, "loss": 0.5965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.624756917265807e-05, "epoch": 0.8856140350877193, "percentage": 17.71, "elapsed_time": "0:42:59", "remaining_time": "3:19:41"}
+{"current_steps": 6320, "total_steps": 35625, "loss": 0.5838, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.62359373291684e-05, "epoch": 0.8870175438596491, "percentage": 17.74, "elapsed_time": "0:43:03", "remaining_time": "3:19:40"}
+{"current_steps": 6330, "total_steps": 35625, "loss": 0.7304, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.622428895277367e-05, "epoch": 0.888421052631579, "percentage": 17.77, "elapsed_time": "0:43:08", "remaining_time": "3:19:38"}
+{"current_steps": 6340, "total_steps": 35625, "loss": 0.5938, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.621262405254253e-05, "epoch": 0.8898245614035087, "percentage": 17.8, "elapsed_time": "0:43:11", "remaining_time": "3:19:29"}
+{"current_steps": 6350, "total_steps": 35625, "loss": 0.6276, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.620094263755652e-05, "epoch": 0.8912280701754386, "percentage": 17.82, "elapsed_time": "0:43:15", "remaining_time": "3:19:25"}
+{"current_steps": 6360, "total_steps": 35625, "loss": 0.613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.618924471691e-05, "epoch": 0.8926315789473684, "percentage": 17.85, "elapsed_time": "0:43:19", "remaining_time": "3:19:22"}
+{"current_steps": 6370, "total_steps": 35625, "loss": 0.599, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.617753029971021e-05, "epoch": 0.8940350877192983, "percentage": 17.88, "elapsed_time": "0:43:23", "remaining_time": "3:19:17"}
+{"current_steps": 6380, "total_steps": 35625, "loss": 0.6358, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6165799395077236e-05, "epoch": 0.895438596491228, "percentage": 17.91, "elapsed_time": "0:43:27", "remaining_time": "3:19:14"}
+{"current_steps": 6390, "total_steps": 35625, "loss": 0.6747, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.615405201214398e-05, "epoch": 0.8968421052631579, "percentage": 17.94, "elapsed_time": "0:43:31", "remaining_time": "3:19:06"}
+{"current_steps": 6400, "total_steps": 35625, "loss": 0.6082, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.614228816005618e-05, "epoch": 0.8982456140350877, "percentage": 17.96, "elapsed_time": "0:43:34", "remaining_time": "3:18:59"}
+{"current_steps": 6410, "total_steps": 35625, "loss": 0.5506, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.61305078479724e-05, "epoch": 0.8996491228070176, "percentage": 17.99, "elapsed_time": "0:43:39", "remaining_time": "3:18:57"}
+{"current_steps": 6420, "total_steps": 35625, "loss": 0.5816, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.611871108506403e-05, "epoch": 0.9010526315789473, "percentage": 18.02, "elapsed_time": "0:43:43", "remaining_time": "3:18:54"}
+{"current_steps": 6430, "total_steps": 35625, "loss": 0.6178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.610689788051523e-05, "epoch": 0.9024561403508772, "percentage": 18.05, "elapsed_time": "0:43:47", "remaining_time": "3:18:49"}
+{"current_steps": 6440, "total_steps": 35625, "loss": 0.644, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6095068243523e-05, "epoch": 0.903859649122807, "percentage": 18.08, "elapsed_time": "0:43:50", "remaining_time": "3:18:41"}
+{"current_steps": 6450, "total_steps": 35625, "loss": 0.5564, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.608322218329711e-05, "epoch": 0.9052631578947369, "percentage": 18.11, "elapsed_time": "0:43:55", "remaining_time": "3:18:41"}
+{"current_steps": 6460, "total_steps": 35625, "loss": 0.6534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.607135970906014e-05, "epoch": 0.9066666666666666, "percentage": 18.13, "elapsed_time": "0:43:59", "remaining_time": "3:18:36"}
+{"current_steps": 6470, "total_steps": 35625, "loss": 0.6671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.605948083004741e-05, "epoch": 0.9080701754385965, "percentage": 18.16, "elapsed_time": "0:44:03", "remaining_time": "3:18:30"}
+{"current_steps": 6480, "total_steps": 35625, "loss": 0.6996, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6047585555507045e-05, "epoch": 0.9094736842105263, "percentage": 18.19, "elapsed_time": "0:44:07", "remaining_time": "3:18:27"}
+{"current_steps": 6490, "total_steps": 35625, "loss": 0.5937, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.603567389469993e-05, "epoch": 0.9108771929824562, "percentage": 18.22, "elapsed_time": "0:44:10", "remaining_time": "3:18:20"}
+{"current_steps": 6500, "total_steps": 35625, "loss": 0.555, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.60237458568997e-05, "epoch": 0.9122807017543859, "percentage": 18.25, "elapsed_time": "0:44:14", "remaining_time": "3:18:12"}
+{"current_steps": 6510, "total_steps": 35625, "loss": 0.6721, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6011801451392736e-05, "epoch": 0.9136842105263158, "percentage": 18.27, "elapsed_time": "0:44:17", "remaining_time": "3:18:05"}
+{"current_steps": 6520, "total_steps": 35625, "loss": 0.5849, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5999840687478167e-05, "epoch": 0.9150877192982456, "percentage": 18.3, "elapsed_time": "0:44:21", "remaining_time": "3:18:01"}
+{"current_steps": 6530, "total_steps": 35625, "loss": 0.6013, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.598786357446786e-05, "epoch": 0.9164912280701755, "percentage": 18.33, "elapsed_time": "0:44:24", "remaining_time": "3:17:52"}
+{"current_steps": 6540, "total_steps": 35625, "loss": 0.6178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5975870121686406e-05, "epoch": 0.9178947368421052, "percentage": 18.36, "elapsed_time": "0:44:28", "remaining_time": "3:17:47"}
+{"current_steps": 6550, "total_steps": 35625, "loss": 0.5985, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.596386033847111e-05, "epoch": 0.9192982456140351, "percentage": 18.39, "elapsed_time": "0:44:33", "remaining_time": "3:17:48"}
+{"current_steps": 6560, "total_steps": 35625, "loss": 0.5878, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5951834234172025e-05, "epoch": 0.9207017543859649, "percentage": 18.41, "elapsed_time": "0:44:38", "remaining_time": "3:17:47"}
+{"current_steps": 6570, "total_steps": 35625, "loss": 0.7004, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.593979181815187e-05, "epoch": 0.9221052631578948, "percentage": 18.44, "elapsed_time": "0:44:42", "remaining_time": "3:17:43"}
+{"current_steps": 6580, "total_steps": 35625, "loss": 0.615, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5927733099786066e-05, "epoch": 0.9235087719298246, "percentage": 18.47, "elapsed_time": "0:44:45", "remaining_time": "3:17:34"}
+{"current_steps": 6590, "total_steps": 35625, "loss": 0.5789, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.591565808846276e-05, "epoch": 0.9249122807017544, "percentage": 18.5, "elapsed_time": "0:44:49", "remaining_time": "3:17:30"}
+{"current_steps": 6600, "total_steps": 35625, "loss": 0.5827, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5903566793582755e-05, "epoch": 0.9263157894736842, "percentage": 18.53, "elapsed_time": "0:44:52", "remaining_time": "3:17:22"}
+{"current_steps": 6610, "total_steps": 35625, "loss": 0.6176, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.589145922455954e-05, "epoch": 0.927719298245614, "percentage": 18.55, "elapsed_time": "0:44:56", "remaining_time": "3:17:16"}
+{"current_steps": 6620, "total_steps": 35625, "loss": 0.7056, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.587933539081927e-05, "epoch": 0.9291228070175439, "percentage": 18.58, "elapsed_time": "0:44:59", "remaining_time": "3:17:07"}
+{"current_steps": 6630, "total_steps": 35625, "loss": 0.6418, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.586719530180075e-05, "epoch": 0.9305263157894736, "percentage": 18.61, "elapsed_time": "0:45:02", "remaining_time": "3:16:59"}
+{"current_steps": 6640, "total_steps": 35625, "loss": 0.5231, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.585503896695549e-05, "epoch": 0.9319298245614035, "percentage": 18.64, "elapsed_time": "0:45:05", "remaining_time": "3:16:49"}
+{"current_steps": 6650, "total_steps": 35625, "loss": 0.7463, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.584286639574758e-05, "epoch": 0.9333333333333333, "percentage": 18.67, "elapsed_time": "0:45:08", "remaining_time": "3:16:41"}
+{"current_steps": 6660, "total_steps": 35625, "loss": 0.6373, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.58306775976538e-05, "epoch": 0.9347368421052632, "percentage": 18.69, "elapsed_time": "0:45:12", "remaining_time": "3:16:34"}
+{"current_steps": 6670, "total_steps": 35625, "loss": 0.6609, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.581847258216355e-05, "epoch": 0.9361403508771929, "percentage": 18.72, "elapsed_time": "0:45:15", "remaining_time": "3:16:26"}
+{"current_steps": 6680, "total_steps": 35625, "loss": 0.6366, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.580625135877884e-05, "epoch": 0.9375438596491228, "percentage": 18.75, "elapsed_time": "0:45:18", "remaining_time": "3:16:17"}
+{"current_steps": 6690, "total_steps": 35625, "loss": 0.6359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5794013937014326e-05, "epoch": 0.9389473684210526, "percentage": 18.78, "elapsed_time": "0:45:23", "remaining_time": "3:16:20"}
+{"current_steps": 6700, "total_steps": 35625, "loss": 0.7239, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.578176032639724e-05, "epoch": 0.9403508771929825, "percentage": 18.81, "elapsed_time": "0:45:27", "remaining_time": "3:16:15"}
+{"current_steps": 6710, "total_steps": 35625, "loss": 0.5848, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5769490536467465e-05, "epoch": 0.9417543859649122, "percentage": 18.84, "elapsed_time": "0:45:32", "remaining_time": "3:16:13"}
+{"current_steps": 6720, "total_steps": 35625, "loss": 0.6155, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5757204576777437e-05, "epoch": 0.9431578947368421, "percentage": 18.86, "elapsed_time": "0:45:36", "remaining_time": "3:16:11"}
+{"current_steps": 6730, "total_steps": 35625, "loss": 0.5817, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.574490245689219e-05, "epoch": 0.9445614035087719, "percentage": 18.89, "elapsed_time": "0:45:39", "remaining_time": "3:16:02"}
+{"current_steps": 6740, "total_steps": 35625, "loss": 0.4903, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.573258418638936e-05, "epoch": 0.9459649122807018, "percentage": 18.92, "elapsed_time": "0:45:43", "remaining_time": "3:15:56"}
+{"current_steps": 6750, "total_steps": 35625, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.572024977485914e-05, "epoch": 0.9473684210526315, "percentage": 18.95, "elapsed_time": "0:45:47", "remaining_time": "3:15:51"}
+{"current_steps": 6760, "total_steps": 35625, "loss": 0.6133, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5707899231904286e-05, "epoch": 0.9487719298245614, "percentage": 18.98, "elapsed_time": "0:45:51", "remaining_time": "3:15:50"}
+{"current_steps": 6770, "total_steps": 35625, "loss": 0.638, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.569553256714012e-05, "epoch": 0.9501754385964912, "percentage": 19.0, "elapsed_time": "0:45:56", "remaining_time": "3:15:48"}
+{"current_steps": 6780, "total_steps": 35625, "loss": 0.7599, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5683149790194526e-05, "epoch": 0.9515789473684211, "percentage": 19.03, "elapsed_time": "0:46:00", "remaining_time": "3:15:43"}
+{"current_steps": 6790, "total_steps": 35625, "loss": 0.6906, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5670750910707903e-05, "epoch": 0.9529824561403509, "percentage": 19.06, "elapsed_time": "0:46:03", "remaining_time": "3:15:35"}
+{"current_steps": 6800, "total_steps": 35625, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.565833593833321e-05, "epoch": 0.9543859649122807, "percentage": 19.09, "elapsed_time": "0:46:07", "remaining_time": "3:15:30"}
+{"current_steps": 6810, "total_steps": 35625, "loss": 0.5566, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5645904882735935e-05, "epoch": 0.9557894736842105, "percentage": 19.12, "elapsed_time": "0:46:10", "remaining_time": "3:15:23"}
+{"current_steps": 6820, "total_steps": 35625, "loss": 0.5748, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.563345775359408e-05, "epoch": 0.9571929824561404, "percentage": 19.14, "elapsed_time": "0:46:13", "remaining_time": "3:15:15"}
+{"current_steps": 6830, "total_steps": 35625, "loss": 0.6256, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.562099456059815e-05, "epoch": 0.9585964912280702, "percentage": 19.17, "elapsed_time": "0:46:17", "remaining_time": "3:15:09"}
+{"current_steps": 6840, "total_steps": 35625, "loss": 0.5826, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5608515313451186e-05, "epoch": 0.96, "percentage": 19.2, "elapsed_time": "0:46:21", "remaining_time": "3:15:05"}
+{"current_steps": 6850, "total_steps": 35625, "loss": 0.5538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.559602002186869e-05, "epoch": 0.9614035087719298, "percentage": 19.23, "elapsed_time": "0:46:24", "remaining_time": "3:14:58"}
+{"current_steps": 6860, "total_steps": 35625, "loss": 0.6514, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.558350869557868e-05, "epoch": 0.9628070175438597, "percentage": 19.26, "elapsed_time": "0:46:27", "remaining_time": "3:14:49"}
+{"current_steps": 6870, "total_steps": 35625, "loss": 0.7813, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.557098134432167e-05, "epoch": 0.9642105263157895, "percentage": 19.28, "elapsed_time": "0:46:31", "remaining_time": "3:14:45"}
+{"current_steps": 6880, "total_steps": 35625, "loss": 0.5993, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.555843797785061e-05, "epoch": 0.9656140350877193, "percentage": 19.31, "elapsed_time": "0:46:34", "remaining_time": "3:14:36"}
+{"current_steps": 6890, "total_steps": 35625, "loss": 0.5594, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.554587860593095e-05, "epoch": 0.9670175438596491, "percentage": 19.34, "elapsed_time": "0:46:37", "remaining_time": "3:14:27"}
+{"current_steps": 6900, "total_steps": 35625, "loss": 0.5581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.553330323834059e-05, "epoch": 0.968421052631579, "percentage": 19.37, "elapsed_time": "0:46:41", "remaining_time": "3:14:23"}
+{"current_steps": 6910, "total_steps": 35625, "loss": 0.6295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.552071188486989e-05, "epoch": 0.9698245614035088, "percentage": 19.4, "elapsed_time": "0:46:45", "remaining_time": "3:14:20"}
+{"current_steps": 6920, "total_steps": 35625, "loss": 0.5697, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.550810455532164e-05, "epoch": 0.9712280701754386, "percentage": 19.42, "elapsed_time": "0:46:49", "remaining_time": "3:14:14"}
+{"current_steps": 6930, "total_steps": 35625, "loss": 0.5933, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5495481259511095e-05, "epoch": 0.9726315789473684, "percentage": 19.45, "elapsed_time": "0:46:52", "remaining_time": "3:14:05"}
+{"current_steps": 6940, "total_steps": 35625, "loss": 0.5723, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.54828420072659e-05, "epoch": 0.9740350877192983, "percentage": 19.48, "elapsed_time": "0:46:55", "remaining_time": "3:13:57"}
+{"current_steps": 6950, "total_steps": 35625, "loss": 0.5749, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.547018680842616e-05, "epoch": 0.9754385964912281, "percentage": 19.51, "elapsed_time": "0:46:59", "remaining_time": "3:13:50"}
+{"current_steps": 6960, "total_steps": 35625, "loss": 0.7053, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.545751567284439e-05, "epoch": 0.9768421052631578, "percentage": 19.54, "elapsed_time": "0:47:02", "remaining_time": "3:13:46"}
+{"current_steps": 6970, "total_steps": 35625, "loss": 0.629, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5444828610385486e-05, "epoch": 0.9782456140350877, "percentage": 19.56, "elapsed_time": "0:47:06", "remaining_time": "3:13:40"}
+{"current_steps": 6980, "total_steps": 35625, "loss": 0.6647, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.543212563092677e-05, "epoch": 0.9796491228070175, "percentage": 19.59, "elapsed_time": "0:47:09", "remaining_time": "3:13:33"}
+{"current_steps": 6990, "total_steps": 35625, "loss": 0.6921, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.541940674435794e-05, "epoch": 0.9810526315789474, "percentage": 19.62, "elapsed_time": "0:47:14", "remaining_time": "3:13:33"}
+{"current_steps": 7000, "total_steps": 35625, "loss": 0.6187, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5406671960581096e-05, "epoch": 0.9824561403508771, "percentage": 19.65, "elapsed_time": "0:47:18", "remaining_time": "3:13:26"}
+{"current_steps": 7010, "total_steps": 35625, "loss": 0.6471, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.53939212895107e-05, "epoch": 0.983859649122807, "percentage": 19.68, "elapsed_time": "0:47:23", "remaining_time": "3:13:25"}
+{"current_steps": 7020, "total_steps": 35625, "loss": 0.5916, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.538115474107357e-05, "epoch": 0.9852631578947368, "percentage": 19.71, "elapsed_time": "0:47:27", "remaining_time": "3:13:21"}
+{"current_steps": 7030, "total_steps": 35625, "loss": 0.6859, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.536837232520893e-05, "epoch": 0.9866666666666667, "percentage": 19.73, "elapsed_time": "0:47:30", "remaining_time": "3:13:13"}
+{"current_steps": 7040, "total_steps": 35625, "loss": 0.64, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.535557405186831e-05, "epoch": 0.9880701754385965, "percentage": 19.76, "elapsed_time": "0:47:33", "remaining_time": "3:13:05"}
+{"current_steps": 7050, "total_steps": 35625, "loss": 0.5839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.534275993101561e-05, "epoch": 0.9894736842105263, "percentage": 19.79, "elapsed_time": "0:47:36", "remaining_time": "3:12:59"}
+{"current_steps": 7060, "total_steps": 35625, "loss": 0.577, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.532992997262706e-05, "epoch": 0.9908771929824561, "percentage": 19.82, "elapsed_time": "0:47:41", "remaining_time": "3:12:59"}
+{"current_steps": 7070, "total_steps": 35625, "loss": 0.7155, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.531708418669122e-05, "epoch": 0.992280701754386, "percentage": 19.85, "elapsed_time": "0:47:45", "remaining_time": "3:12:55"}
+{"current_steps": 7080, "total_steps": 35625, "loss": 0.6713, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5304222583208983e-05, "epoch": 0.9936842105263158, "percentage": 19.87, "elapsed_time": "0:47:50", "remaining_time": "3:12:51"}
+{"current_steps": 7090, "total_steps": 35625, "loss": 0.6528, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5291345172193546e-05, "epoch": 0.9950877192982456, "percentage": 19.9, "elapsed_time": "0:47:55", "remaining_time": "3:12:51"}
+{"current_steps": 7100, "total_steps": 35625, "loss": 0.5705, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5278451963670403e-05, "epoch": 0.9964912280701754, "percentage": 19.93, "elapsed_time": "0:47:58", "remaining_time": "3:12:45"}
+{"current_steps": 7110, "total_steps": 35625, "loss": 0.6763, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.526554296767738e-05, "epoch": 0.9978947368421053, "percentage": 19.96, "elapsed_time": "0:48:02", "remaining_time": "3:12:40"}
+{"current_steps": 7120, "total_steps": 35625, "loss": 0.5855, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.525261819426455e-05, "epoch": 0.9992982456140351, "percentage": 19.99, "elapsed_time": "0:48:05", "remaining_time": "3:12:33"}
+{"current_steps": 7130, "total_steps": 35625, "loss": 0.5631, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5239677653494305e-05, "epoch": 1.0007017543859649, "percentage": 20.01, "elapsed_time": "0:48:10", "remaining_time": "3:12:31"}
+{"current_steps": 7140, "total_steps": 35625, "loss": 0.493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5226721355441306e-05, "epoch": 1.0021052631578948, "percentage": 20.04, "elapsed_time": "0:48:13", "remaining_time": "3:12:25"}
+{"current_steps": 7150, "total_steps": 35625, "loss": 0.5049, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5213749310192455e-05, "epoch": 1.0035087719298246, "percentage": 20.07, "elapsed_time": "0:48:16", "remaining_time": "3:12:17"}
+{"current_steps": 7160, "total_steps": 35625, "loss": 0.5204, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.520076152784695e-05, "epoch": 1.0049122807017543, "percentage": 20.1, "elapsed_time": "0:48:20", "remaining_time": "3:12:09"}
+{"current_steps": 7170, "total_steps": 35625, "loss": 0.5395, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.518775801851622e-05, "epoch": 1.0063157894736843, "percentage": 20.13, "elapsed_time": "0:48:23", "remaining_time": "3:12:02"}
+{"current_steps": 7180, "total_steps": 35625, "loss": 0.5231, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.517473879232395e-05, "epoch": 1.007719298245614, "percentage": 20.15, "elapsed_time": "0:48:28", "remaining_time": "3:12:02"}
+{"current_steps": 7190, "total_steps": 35625, "loss": 0.5764, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.516170385940603e-05, "epoch": 1.0091228070175438, "percentage": 20.18, "elapsed_time": "0:48:32", "remaining_time": "3:11:59"}
+{"current_steps": 7200, "total_steps": 35625, "loss": 0.5339, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.514865322991063e-05, "epoch": 1.0105263157894737, "percentage": 20.21, "elapsed_time": "0:48:36", "remaining_time": "3:11:52"}
+{"current_steps": 7210, "total_steps": 35625, "loss": 0.5684, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.51355869139981e-05, "epoch": 1.0119298245614035, "percentage": 20.24, "elapsed_time": "0:48:39", "remaining_time": "3:11:44"}
+{"current_steps": 7220, "total_steps": 35625, "loss": 0.539, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.512250492184101e-05, "epoch": 1.0133333333333334, "percentage": 20.27, "elapsed_time": "0:48:42", "remaining_time": "3:11:38"}
+{"current_steps": 7230, "total_steps": 35625, "loss": 0.5348, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.510940726362416e-05, "epoch": 1.0147368421052632, "percentage": 20.29, "elapsed_time": "0:48:46", "remaining_time": "3:11:33"}
+{"current_steps": 7240, "total_steps": 35625, "loss": 0.5795, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.50962939495445e-05, "epoch": 1.016140350877193, "percentage": 20.32, "elapsed_time": "0:48:49", "remaining_time": "3:11:26"}
+{"current_steps": 7250, "total_steps": 35625, "loss": 0.5894, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.50831649898112e-05, "epoch": 1.0175438596491229, "percentage": 20.35, "elapsed_time": "0:48:52", "remaining_time": "3:11:18"}
+{"current_steps": 7260, "total_steps": 35625, "loss": 0.5825, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.507002039464562e-05, "epoch": 1.0189473684210526, "percentage": 20.38, "elapsed_time": "0:48:55", "remaining_time": "3:11:10"}
+{"current_steps": 7270, "total_steps": 35625, "loss": 0.5513, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.505686017428127e-05, "epoch": 1.0203508771929826, "percentage": 20.41, "elapsed_time": "0:48:59", "remaining_time": "3:11:03"}
+{"current_steps": 7280, "total_steps": 35625, "loss": 0.6675, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.504368433896382e-05, "epoch": 1.0217543859649123, "percentage": 20.44, "elapsed_time": "0:49:03", "remaining_time": "3:11:00"}
+{"current_steps": 7290, "total_steps": 35625, "loss": 0.5146, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5030492898951134e-05, "epoch": 1.023157894736842, "percentage": 20.46, "elapsed_time": "0:49:06", "remaining_time": "3:10:51"}
+{"current_steps": 7300, "total_steps": 35625, "loss": 0.6254, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.501728586451318e-05, "epoch": 1.024561403508772, "percentage": 20.49, "elapsed_time": "0:49:09", "remaining_time": "3:10:45"}
+{"current_steps": 7310, "total_steps": 35625, "loss": 0.4688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5004063245932097e-05, "epoch": 1.0259649122807017, "percentage": 20.52, "elapsed_time": "0:49:13", "remaining_time": "3:10:40"}
+{"current_steps": 7320, "total_steps": 35625, "loss": 0.5227, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4990825053502136e-05, "epoch": 1.0273684210526315, "percentage": 20.55, "elapsed_time": "0:49:18", "remaining_time": "3:10:38"}
+{"current_steps": 7330, "total_steps": 35625, "loss": 0.5219, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.497757129752969e-05, "epoch": 1.0287719298245614, "percentage": 20.58, "elapsed_time": "0:49:22", "remaining_time": "3:10:36"}
+{"current_steps": 7340, "total_steps": 35625, "loss": 0.5006, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.496430198833327e-05, "epoch": 1.0301754385964912, "percentage": 20.6, "elapsed_time": "0:49:27", "remaining_time": "3:10:33"}
+{"current_steps": 7350, "total_steps": 35625, "loss": 0.519, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.495101713624348e-05, "epoch": 1.0315789473684212, "percentage": 20.63, "elapsed_time": "0:49:30", "remaining_time": "3:10:26"}
+{"current_steps": 7360, "total_steps": 35625, "loss": 0.6042, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.493771675160303e-05, "epoch": 1.032982456140351, "percentage": 20.66, "elapsed_time": "0:49:34", "remaining_time": "3:10:22"}
+{"current_steps": 7370, "total_steps": 35625, "loss": 0.5092, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4924400844766734e-05, "epoch": 1.0343859649122806, "percentage": 20.69, "elapsed_time": "0:49:37", "remaining_time": "3:10:14"}
+{"current_steps": 7380, "total_steps": 35625, "loss": 0.6178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.491106942610147e-05, "epoch": 1.0357894736842106, "percentage": 20.72, "elapsed_time": "0:49:43", "remaining_time": "3:10:18"}
+{"current_steps": 7390, "total_steps": 35625, "loss": 0.6226, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.489772250598622e-05, "epoch": 1.0371929824561403, "percentage": 20.74, "elapsed_time": "0:49:47", "remaining_time": "3:10:13"}
+{"current_steps": 7400, "total_steps": 35625, "loss": 0.5821, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.488436009481201e-05, "epoch": 1.03859649122807, "percentage": 20.77, "elapsed_time": "0:49:50", "remaining_time": "3:10:06"}
+{"current_steps": 7410, "total_steps": 35625, "loss": 0.5265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.487098220298193e-05, "epoch": 1.04, "percentage": 20.8, "elapsed_time": "0:49:53", "remaining_time": "3:09:59"}
+{"current_steps": 7420, "total_steps": 35625, "loss": 0.5617, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.485758884091113e-05, "epoch": 1.0414035087719298, "percentage": 20.83, "elapsed_time": "0:49:58", "remaining_time": "3:09:58"}
+{"current_steps": 7430, "total_steps": 35625, "loss": 0.5468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4844180019026805e-05, "epoch": 1.0428070175438597, "percentage": 20.86, "elapsed_time": "0:50:02", "remaining_time": "3:09:52"}
+{"current_steps": 7440, "total_steps": 35625, "loss": 0.5048, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.483075574776819e-05, "epoch": 1.0442105263157895, "percentage": 20.88, "elapsed_time": "0:50:05", "remaining_time": "3:09:44"}
+{"current_steps": 7450, "total_steps": 35625, "loss": 0.5684, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4817316037586524e-05, "epoch": 1.0456140350877192, "percentage": 20.91, "elapsed_time": "0:50:08", "remaining_time": "3:09:36"}
+{"current_steps": 7460, "total_steps": 35625, "loss": 0.5851, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.480386089894509e-05, "epoch": 1.0470175438596492, "percentage": 20.94, "elapsed_time": "0:50:11", "remaining_time": "3:09:28"}
+{"current_steps": 7470, "total_steps": 35625, "loss": 0.5308, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.479039034231918e-05, "epoch": 1.048421052631579, "percentage": 20.97, "elapsed_time": "0:50:16", "remaining_time": "3:09:28"}
+{"current_steps": 7480, "total_steps": 35625, "loss": 0.5904, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.477690437819607e-05, "epoch": 1.0498245614035087, "percentage": 21.0, "elapsed_time": "0:50:19", "remaining_time": "3:09:22"}
+{"current_steps": 7490, "total_steps": 35625, "loss": 0.4894, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.476340301707507e-05, "epoch": 1.0512280701754386, "percentage": 21.02, "elapsed_time": "0:50:24", "remaining_time": "3:09:20"}
+{"current_steps": 7500, "total_steps": 35625, "loss": 0.4906, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4749886269467416e-05, "epoch": 1.0526315789473684, "percentage": 21.05, "elapsed_time": "0:50:29", "remaining_time": "3:09:21"}
+{"current_steps": 7510, "total_steps": 35625, "loss": 0.5399, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.473635414589639e-05, "epoch": 1.0540350877192983, "percentage": 21.08, "elapsed_time": "0:50:32", "remaining_time": "3:09:14"}
+{"current_steps": 7520, "total_steps": 35625, "loss": 0.5168, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.47228066568972e-05, "epoch": 1.055438596491228, "percentage": 21.11, "elapsed_time": "0:50:36", "remaining_time": "3:09:07"}
+{"current_steps": 7530, "total_steps": 35625, "loss": 0.4888, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.470924381301704e-05, "epoch": 1.0568421052631578, "percentage": 21.14, "elapsed_time": "0:50:40", "remaining_time": "3:09:04"}
+{"current_steps": 7540, "total_steps": 35625, "loss": 0.4909, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.469566562481503e-05, "epoch": 1.0582456140350878, "percentage": 21.16, "elapsed_time": "0:50:43", "remaining_time": "3:08:57"}
+{"current_steps": 7550, "total_steps": 35625, "loss": 0.5369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4682072102862286e-05, "epoch": 1.0596491228070175, "percentage": 21.19, "elapsed_time": "0:50:46", "remaining_time": "3:08:50"}
+{"current_steps": 7560, "total_steps": 35625, "loss": 0.5046, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.466846325774179e-05, "epoch": 1.0610526315789475, "percentage": 21.22, "elapsed_time": "0:50:51", "remaining_time": "3:08:48"}
+{"current_steps": 7570, "total_steps": 35625, "loss": 0.5201, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4654839100048535e-05, "epoch": 1.0624561403508772, "percentage": 21.25, "elapsed_time": "0:50:56", "remaining_time": "3:08:45"}
+{"current_steps": 7580, "total_steps": 35625, "loss": 0.5238, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.464119964038937e-05, "epoch": 1.063859649122807, "percentage": 21.28, "elapsed_time": "0:50:59", "remaining_time": "3:08:39"}
+{"current_steps": 7590, "total_steps": 35625, "loss": 0.5074, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.462754488938309e-05, "epoch": 1.065263157894737, "percentage": 21.31, "elapsed_time": "0:51:03", "remaining_time": "3:08:33"}
+{"current_steps": 7600, "total_steps": 35625, "loss": 0.5297, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4613874857660384e-05, "epoch": 1.0666666666666667, "percentage": 21.33, "elapsed_time": "0:51:06", "remaining_time": "3:08:27"}
+{"current_steps": 7610, "total_steps": 35625, "loss": 0.5585, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.460018955586384e-05, "epoch": 1.0680701754385964, "percentage": 21.36, "elapsed_time": "0:51:11", "remaining_time": "3:08:28"}
+{"current_steps": 7620, "total_steps": 35625, "loss": 0.4944, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.458648899464793e-05, "epoch": 1.0694736842105264, "percentage": 21.39, "elapsed_time": "0:51:16", "remaining_time": "3:08:27"}
+{"current_steps": 7630, "total_steps": 35625, "loss": 0.5736, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.457277318467903e-05, "epoch": 1.070877192982456, "percentage": 21.42, "elapsed_time": "0:51:21", "remaining_time": "3:08:25"}
+{"current_steps": 7640, "total_steps": 35625, "loss": 0.6152, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4559042136635345e-05, "epoch": 1.072280701754386, "percentage": 21.45, "elapsed_time": "0:51:26", "remaining_time": "3:08:23"}
+{"current_steps": 7650, "total_steps": 35625, "loss": 0.4936, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4545295861206975e-05, "epoch": 1.0736842105263158, "percentage": 21.47, "elapsed_time": "0:51:29", "remaining_time": "3:08:16"}
+{"current_steps": 7660, "total_steps": 35625, "loss": 0.5547, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.453153436909587e-05, "epoch": 1.0750877192982455, "percentage": 21.5, "elapsed_time": "0:51:32", "remaining_time": "3:08:11"}
+{"current_steps": 7670, "total_steps": 35625, "loss": 0.537, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4517757671015826e-05, "epoch": 1.0764912280701755, "percentage": 21.53, "elapsed_time": "0:51:36", "remaining_time": "3:08:05"}
+{"current_steps": 7680, "total_steps": 35625, "loss": 0.5131, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4503965777692456e-05, "epoch": 1.0778947368421052, "percentage": 21.56, "elapsed_time": "0:51:39", "remaining_time": "3:07:58"}
+{"current_steps": 7690, "total_steps": 35625, "loss": 0.4782, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.449015869986325e-05, "epoch": 1.079298245614035, "percentage": 21.59, "elapsed_time": "0:51:44", "remaining_time": "3:07:56"}
+{"current_steps": 7700, "total_steps": 35625, "loss": 0.4962, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.447633644827747e-05, "epoch": 1.080701754385965, "percentage": 21.61, "elapsed_time": "0:51:47", "remaining_time": "3:07:49"}
+{"current_steps": 7710, "total_steps": 35625, "loss": 0.5025, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.446249903369621e-05, "epoch": 1.0821052631578947, "percentage": 21.64, "elapsed_time": "0:51:51", "remaining_time": "3:07:46"}
+{"current_steps": 7720, "total_steps": 35625, "loss": 0.4816, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.444864646689239e-05, "epoch": 1.0835087719298246, "percentage": 21.67, "elapsed_time": "0:51:57", "remaining_time": "3:07:47"}
+{"current_steps": 7730, "total_steps": 35625, "loss": 0.4762, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.443477875865071e-05, "epoch": 1.0849122807017544, "percentage": 21.7, "elapsed_time": "0:52:04", "remaining_time": "3:07:53"}
+{"current_steps": 7740, "total_steps": 35625, "loss": 0.4501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4420895919767626e-05, "epoch": 1.0863157894736841, "percentage": 21.73, "elapsed_time": "0:52:07", "remaining_time": "3:07:46"}
+{"current_steps": 7750, "total_steps": 35625, "loss": 0.4855, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.440699796105143e-05, "epoch": 1.087719298245614, "percentage": 21.75, "elapsed_time": "0:52:11", "remaining_time": "3:07:42"}
+{"current_steps": 7760, "total_steps": 35625, "loss": 0.5558, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.439308489332215e-05, "epoch": 1.0891228070175438, "percentage": 21.78, "elapsed_time": "0:52:15", "remaining_time": "3:07:39"}
+{"current_steps": 7770, "total_steps": 35625, "loss": 0.5219, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.437915672741158e-05, "epoch": 1.0905263157894738, "percentage": 21.81, "elapsed_time": "0:52:19", "remaining_time": "3:07:34"}
+{"current_steps": 7780, "total_steps": 35625, "loss": 0.4643, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.43652134741633e-05, "epoch": 1.0919298245614035, "percentage": 21.84, "elapsed_time": "0:52:22", "remaining_time": "3:07:28"}
+{"current_steps": 7790, "total_steps": 35625, "loss": 0.562, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.435125514443258e-05, "epoch": 1.0933333333333333, "percentage": 21.87, "elapsed_time": "0:52:25", "remaining_time": "3:07:20"}
+{"current_steps": 7800, "total_steps": 35625, "loss": 0.5022, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4337281749086477e-05, "epoch": 1.0947368421052632, "percentage": 21.89, "elapsed_time": "0:52:29", "remaining_time": "3:07:16"}
+{"current_steps": 7810, "total_steps": 35625, "loss": 0.5462, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.432329329900375e-05, "epoch": 1.096140350877193, "percentage": 21.92, "elapsed_time": "0:52:33", "remaining_time": "3:07:09"}
+{"current_steps": 7820, "total_steps": 35625, "loss": 0.5103, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4309289805074895e-05, "epoch": 1.0975438596491227, "percentage": 21.95, "elapsed_time": "0:52:36", "remaining_time": "3:07:02"}
+{"current_steps": 7830, "total_steps": 35625, "loss": 0.5588, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.42952712782021e-05, "epoch": 1.0989473684210527, "percentage": 21.98, "elapsed_time": "0:52:39", "remaining_time": "3:06:56"}
+{"current_steps": 7840, "total_steps": 35625, "loss": 0.5107, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.428123772929928e-05, "epoch": 1.1003508771929824, "percentage": 22.01, "elapsed_time": "0:52:43", "remaining_time": "3:06:52"}
+{"current_steps": 7850, "total_steps": 35625, "loss": 0.463, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.426718916929202e-05, "epoch": 1.1017543859649124, "percentage": 22.04, "elapsed_time": "0:52:47", "remaining_time": "3:06:48"}
+{"current_steps": 7860, "total_steps": 35625, "loss": 0.4801, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.425312560911762e-05, "epoch": 1.1031578947368421, "percentage": 22.06, "elapsed_time": "0:52:52", "remaining_time": "3:06:45"}
+{"current_steps": 7870, "total_steps": 35625, "loss": 0.4755, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4239047059725035e-05, "epoch": 1.1045614035087719, "percentage": 22.09, "elapsed_time": "0:52:56", "remaining_time": "3:06:42"}
+{"current_steps": 7880, "total_steps": 35625, "loss": 0.4449, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.422495353207491e-05, "epoch": 1.1059649122807018, "percentage": 22.12, "elapsed_time": "0:53:00", "remaining_time": "3:06:39"}
+{"current_steps": 7890, "total_steps": 35625, "loss": 0.4613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4210845037139525e-05, "epoch": 1.1073684210526316, "percentage": 22.15, "elapsed_time": "0:53:05", "remaining_time": "3:06:39"}
+{"current_steps": 7900, "total_steps": 35625, "loss": 0.6132, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.419672158590282e-05, "epoch": 1.1087719298245613, "percentage": 22.18, "elapsed_time": "0:53:09", "remaining_time": "3:06:33"}
+{"current_steps": 7910, "total_steps": 35625, "loss": 0.5235, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4182583189360415e-05, "epoch": 1.1101754385964913, "percentage": 22.2, "elapsed_time": "0:53:14", "remaining_time": "3:06:31"}
+{"current_steps": 7920, "total_steps": 35625, "loss": 0.5066, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.416842985851951e-05, "epoch": 1.111578947368421, "percentage": 22.23, "elapsed_time": "0:53:17", "remaining_time": "3:06:25"}
+{"current_steps": 7930, "total_steps": 35625, "loss": 0.5148, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.415426160439897e-05, "epoch": 1.112982456140351, "percentage": 22.26, "elapsed_time": "0:53:20", "remaining_time": "3:06:18"}
+{"current_steps": 7940, "total_steps": 35625, "loss": 0.4731, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.414007843802927e-05, "epoch": 1.1143859649122807, "percentage": 22.29, "elapsed_time": "0:53:24", "remaining_time": "3:06:12"}
+{"current_steps": 7950, "total_steps": 35625, "loss": 0.4657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.412588037045248e-05, "epoch": 1.1157894736842104, "percentage": 22.32, "elapsed_time": "0:53:28", "remaining_time": "3:06:08"}
+{"current_steps": 7960, "total_steps": 35625, "loss": 0.5292, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.411166741272228e-05, "epoch": 1.1171929824561404, "percentage": 22.34, "elapsed_time": "0:53:31", "remaining_time": "3:06:00"}
+{"current_steps": 7970, "total_steps": 35625, "loss": 0.5086, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4097439575903964e-05, "epoch": 1.1185964912280701, "percentage": 22.37, "elapsed_time": "0:53:35", "remaining_time": "3:05:56"}
+{"current_steps": 7980, "total_steps": 35625, "loss": 0.4074, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.408319687107437e-05, "epoch": 1.12, "percentage": 22.4, "elapsed_time": "0:53:40", "remaining_time": "3:05:57"}
+{"current_steps": 7990, "total_steps": 35625, "loss": 0.5302, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.406893930932195e-05, "epoch": 1.1214035087719298, "percentage": 22.43, "elapsed_time": "0:53:43", "remaining_time": "3:05:50"}
+{"current_steps": 8000, "total_steps": 35625, "loss": 0.5533, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4054666901746685e-05, "epoch": 1.1228070175438596, "percentage": 22.46, "elapsed_time": "0:53:47", "remaining_time": "3:05:43"}
+{"current_steps": 8000, "total_steps": 35625, "loss": null, "eval_loss": 0.6547604203224182, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.1228070175438596, "percentage": 22.46, "elapsed_time": "0:53:47", "remaining_time": "3:05:43"}
+{"current_steps": 8010, "total_steps": 35625, "loss": 0.5533, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.404037965946015e-05, "epoch": 1.1242105263157895, "percentage": 22.48, "elapsed_time": "0:54:34", "remaining_time": "3:08:10"}
+{"current_steps": 8020, "total_steps": 35625, "loss": 0.573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.402607759358545e-05, "epoch": 1.1256140350877193, "percentage": 22.51, "elapsed_time": "0:54:38", "remaining_time": "3:08:04"}
+{"current_steps": 8030, "total_steps": 35625, "loss": 0.6002, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.401176071525722e-05, "epoch": 1.127017543859649, "percentage": 22.54, "elapsed_time": "0:54:41", "remaining_time": "3:07:57"}
+{"current_steps": 8040, "total_steps": 35625, "loss": 0.5412, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.399742903562166e-05, "epoch": 1.128421052631579, "percentage": 22.57, "elapsed_time": "0:54:45", "remaining_time": "3:07:52"}
+{"current_steps": 8050, "total_steps": 35625, "loss": 0.5516, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3983082565836454e-05, "epoch": 1.1298245614035087, "percentage": 22.6, "elapsed_time": "0:54:48", "remaining_time": "3:07:45"}
+{"current_steps": 8060, "total_steps": 35625, "loss": 0.5142, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3968721317070835e-05, "epoch": 1.1312280701754387, "percentage": 22.62, "elapsed_time": "0:54:52", "remaining_time": "3:07:39"}
+{"current_steps": 8070, "total_steps": 35625, "loss": 0.4974, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.395434530050553e-05, "epoch": 1.1326315789473684, "percentage": 22.65, "elapsed_time": "0:54:56", "remaining_time": "3:07:34"}
+{"current_steps": 8080, "total_steps": 35625, "loss": 0.5921, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.393995452733274e-05, "epoch": 1.1340350877192982, "percentage": 22.68, "elapsed_time": "0:54:59", "remaining_time": "3:07:27"}
+{"current_steps": 8090, "total_steps": 35625, "loss": 0.5516, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.392554900875619e-05, "epoch": 1.1354385964912281, "percentage": 22.71, "elapsed_time": "0:55:04", "remaining_time": "3:07:26"}
+{"current_steps": 8100, "total_steps": 35625, "loss": 0.4377, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3911128755991085e-05, "epoch": 1.1368421052631579, "percentage": 22.74, "elapsed_time": "0:55:08", "remaining_time": "3:07:22"}
+{"current_steps": 8110, "total_steps": 35625, "loss": 0.5489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3896693780264054e-05, "epoch": 1.1382456140350876, "percentage": 22.76, "elapsed_time": "0:55:11", "remaining_time": "3:07:16"}
+{"current_steps": 8120, "total_steps": 35625, "loss": 0.4883, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.388224409281324e-05, "epoch": 1.1396491228070176, "percentage": 22.79, "elapsed_time": "0:55:15", "remaining_time": "3:07:12"}
+{"current_steps": 8130, "total_steps": 35625, "loss": 0.5316, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3867779704888225e-05, "epoch": 1.1410526315789473, "percentage": 22.82, "elapsed_time": "0:55:19", "remaining_time": "3:07:07"}
+{"current_steps": 8140, "total_steps": 35625, "loss": 0.5961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.385330062775001e-05, "epoch": 1.1424561403508773, "percentage": 22.85, "elapsed_time": "0:55:23", "remaining_time": "3:07:00"}
+{"current_steps": 8150, "total_steps": 35625, "loss": 0.5839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.383880687267107e-05, "epoch": 1.143859649122807, "percentage": 22.88, "elapsed_time": "0:55:27", "remaining_time": "3:06:59"}
+{"current_steps": 8160, "total_steps": 35625, "loss": 0.4834, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3824298450935284e-05, "epoch": 1.1452631578947368, "percentage": 22.91, "elapsed_time": "0:55:32", "remaining_time": "3:06:56"}
+{"current_steps": 8170, "total_steps": 35625, "loss": 0.5543, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.380977537383796e-05, "epoch": 1.1466666666666667, "percentage": 22.93, "elapsed_time": "0:55:35", "remaining_time": "3:06:49"}
+{"current_steps": 8180, "total_steps": 35625, "loss": 0.5548, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.37952376526858e-05, "epoch": 1.1480701754385965, "percentage": 22.96, "elapsed_time": "0:55:38", "remaining_time": "3:06:42"}
+{"current_steps": 8190, "total_steps": 35625, "loss": 0.4596, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.378068529879693e-05, "epoch": 1.1494736842105264, "percentage": 22.99, "elapsed_time": "0:55:43", "remaining_time": "3:06:38"}
+{"current_steps": 8200, "total_steps": 35625, "loss": 0.5165, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.376611832350085e-05, "epoch": 1.1508771929824562, "percentage": 23.02, "elapsed_time": "0:55:46", "remaining_time": "3:06:33"}
+{"current_steps": 8210, "total_steps": 35625, "loss": 0.5085, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3751536738138454e-05, "epoch": 1.152280701754386, "percentage": 23.05, "elapsed_time": "0:55:50", "remaining_time": "3:06:28"}
+{"current_steps": 8220, "total_steps": 35625, "loss": 0.5485, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3736940554062e-05, "epoch": 1.1536842105263159, "percentage": 23.07, "elapsed_time": "0:55:56", "remaining_time": "3:06:28"}
+{"current_steps": 8230, "total_steps": 35625, "loss": 0.5049, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.372232978263513e-05, "epoch": 1.1550877192982456, "percentage": 23.1, "elapsed_time": "0:56:00", "remaining_time": "3:06:25"}
+{"current_steps": 8240, "total_steps": 35625, "loss": 0.4833, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3707704435232816e-05, "epoch": 1.1564912280701753, "percentage": 23.13, "elapsed_time": "0:56:03", "remaining_time": "3:06:17"}
+{"current_steps": 8250, "total_steps": 35625, "loss": 0.5616, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.36930645232414e-05, "epoch": 1.1578947368421053, "percentage": 23.16, "elapsed_time": "0:56:06", "remaining_time": "3:06:11"}
+{"current_steps": 8260, "total_steps": 35625, "loss": 0.5448, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.367841005805855e-05, "epoch": 1.159298245614035, "percentage": 23.19, "elapsed_time": "0:56:10", "remaining_time": "3:06:07"}
+{"current_steps": 8270, "total_steps": 35625, "loss": 0.4714, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.366374105109327e-05, "epoch": 1.1607017543859648, "percentage": 23.21, "elapsed_time": "0:56:14", "remaining_time": "3:06:02"}
+{"current_steps": 8280, "total_steps": 35625, "loss": 0.5994, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.364905751376589e-05, "epoch": 1.1621052631578948, "percentage": 23.24, "elapsed_time": "0:56:17", "remaining_time": "3:05:55"}
+{"current_steps": 8290, "total_steps": 35625, "loss": 0.5633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3634359457508046e-05, "epoch": 1.1635087719298245, "percentage": 23.27, "elapsed_time": "0:56:21", "remaining_time": "3:05:48"}
+{"current_steps": 8300, "total_steps": 35625, "loss": 0.5566, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3619646893762675e-05, "epoch": 1.1649122807017545, "percentage": 23.3, "elapsed_time": "0:56:25", "remaining_time": "3:05:45"}
+{"current_steps": 8310, "total_steps": 35625, "loss": 0.4631, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.360491983398402e-05, "epoch": 1.1663157894736842, "percentage": 23.33, "elapsed_time": "0:56:29", "remaining_time": "3:05:41"}
+{"current_steps": 8320, "total_steps": 35625, "loss": 0.4525, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3590178289637585e-05, "epoch": 1.167719298245614, "percentage": 23.35, "elapsed_time": "0:56:32", "remaining_time": "3:05:34"}
+{"current_steps": 8330, "total_steps": 35625, "loss": 0.4501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.357542227220019e-05, "epoch": 1.169122807017544, "percentage": 23.38, "elapsed_time": "0:56:38", "remaining_time": "3:05:34"}
+{"current_steps": 8340, "total_steps": 35625, "loss": 0.561, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.356065179315988e-05, "epoch": 1.1705263157894736, "percentage": 23.41, "elapsed_time": "0:56:42", "remaining_time": "3:05:31"}
+{"current_steps": 8350, "total_steps": 35625, "loss": 0.513, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.354586686401599e-05, "epoch": 1.1719298245614036, "percentage": 23.44, "elapsed_time": "0:56:46", "remaining_time": "3:05:26"}
+{"current_steps": 8360, "total_steps": 35625, "loss": 0.4915, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.353106749627909e-05, "epoch": 1.1733333333333333, "percentage": 23.47, "elapsed_time": "0:56:49", "remaining_time": "3:05:19"}
+{"current_steps": 8370, "total_steps": 35625, "loss": 0.5193, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3516253701471e-05, "epoch": 1.174736842105263, "percentage": 23.49, "elapsed_time": "0:56:52", "remaining_time": "3:05:12"}
+{"current_steps": 8380, "total_steps": 35625, "loss": 0.5881, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.350142549112476e-05, "epoch": 1.176140350877193, "percentage": 23.52, "elapsed_time": "0:56:57", "remaining_time": "3:05:09"}
+{"current_steps": 8390, "total_steps": 35625, "loss": 0.5378, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.348658287678465e-05, "epoch": 1.1775438596491228, "percentage": 23.55, "elapsed_time": "0:57:01", "remaining_time": "3:05:05"}
+{"current_steps": 8400, "total_steps": 35625, "loss": 0.5356, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.347172587000614e-05, "epoch": 1.1789473684210527, "percentage": 23.58, "elapsed_time": "0:57:04", "remaining_time": "3:05:00"}
+{"current_steps": 8410, "total_steps": 35625, "loss": 0.5849, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.345685448235594e-05, "epoch": 1.1803508771929825, "percentage": 23.61, "elapsed_time": "0:57:10", "remaining_time": "3:05:01"}
+{"current_steps": 8420, "total_steps": 35625, "loss": 0.5157, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3441968725411905e-05, "epoch": 1.1817543859649122, "percentage": 23.64, "elapsed_time": "0:57:14", "remaining_time": "3:04:56"}
+{"current_steps": 8430, "total_steps": 35625, "loss": 0.6508, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.342706861076313e-05, "epoch": 1.1831578947368422, "percentage": 23.66, "elapsed_time": "0:57:18", "remaining_time": "3:04:52"}
+{"current_steps": 8440, "total_steps": 35625, "loss": 0.525, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.341215415000987e-05, "epoch": 1.184561403508772, "percentage": 23.69, "elapsed_time": "0:57:22", "remaining_time": "3:04:47"}
+{"current_steps": 8450, "total_steps": 35625, "loss": 0.6218, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.339722535476353e-05, "epoch": 1.1859649122807017, "percentage": 23.72, "elapsed_time": "0:57:25", "remaining_time": "3:04:39"}
+{"current_steps": 8460, "total_steps": 35625, "loss": 0.5375, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3382282236646684e-05, "epoch": 1.1873684210526316, "percentage": 23.75, "elapsed_time": "0:57:28", "remaining_time": "3:04:32"}
+{"current_steps": 8470, "total_steps": 35625, "loss": 0.5454, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.336732480729306e-05, "epoch": 1.1887719298245614, "percentage": 23.78, "elapsed_time": "0:57:32", "remaining_time": "3:04:27"}
+{"current_steps": 8480, "total_steps": 35625, "loss": 0.5507, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.335235307834755e-05, "epoch": 1.190175438596491, "percentage": 23.8, "elapsed_time": "0:57:35", "remaining_time": "3:04:21"}
+{"current_steps": 8490, "total_steps": 35625, "loss": 0.5172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.333736706146615e-05, "epoch": 1.191578947368421, "percentage": 23.83, "elapsed_time": "0:57:39", "remaining_time": "3:04:16"}
+{"current_steps": 8500, "total_steps": 35625, "loss": 0.5401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.332236676831598e-05, "epoch": 1.1929824561403508, "percentage": 23.86, "elapsed_time": "0:57:44", "remaining_time": "3:04:15"}
+{"current_steps": 8510, "total_steps": 35625, "loss": 0.4734, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.330735221057529e-05, "epoch": 1.1943859649122808, "percentage": 23.89, "elapsed_time": "0:57:48", "remaining_time": "3:04:11"}
+{"current_steps": 8520, "total_steps": 35625, "loss": 0.4783, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.329232339993342e-05, "epoch": 1.1957894736842105, "percentage": 23.92, "elapsed_time": "0:57:53", "remaining_time": "3:04:11"}
+{"current_steps": 8530, "total_steps": 35625, "loss": 0.5843, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.327728034809082e-05, "epoch": 1.1971929824561403, "percentage": 23.94, "elapsed_time": "0:57:56", "remaining_time": "3:04:03"}
+{"current_steps": 8540, "total_steps": 35625, "loss": 0.4922, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.326222306675902e-05, "epoch": 1.1985964912280702, "percentage": 23.97, "elapsed_time": "0:58:00", "remaining_time": "3:03:58"}
+{"current_steps": 8550, "total_steps": 35625, "loss": 0.6196, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.324715156766064e-05, "epoch": 1.2, "percentage": 24.0, "elapsed_time": "0:58:04", "remaining_time": "3:03:53"}
+{"current_steps": 8560, "total_steps": 35625, "loss": 0.4713, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3232065862529334e-05, "epoch": 1.20140350877193, "percentage": 24.03, "elapsed_time": "0:58:08", "remaining_time": "3:03:48"}
+{"current_steps": 8570, "total_steps": 35625, "loss": 0.5015, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.321696596310987e-05, "epoch": 1.2028070175438597, "percentage": 24.06, "elapsed_time": "0:58:11", "remaining_time": "3:03:42"}
+{"current_steps": 8580, "total_steps": 35625, "loss": 0.569, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3201851881158004e-05, "epoch": 1.2042105263157894, "percentage": 24.08, "elapsed_time": "0:58:16", "remaining_time": "3:03:42"}
+{"current_steps": 8590, "total_steps": 35625, "loss": 0.5079, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.31867236284406e-05, "epoch": 1.2056140350877194, "percentage": 24.11, "elapsed_time": "0:58:22", "remaining_time": "3:03:42"}
+{"current_steps": 8600, "total_steps": 35625, "loss": 0.5132, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.31715812167355e-05, "epoch": 1.207017543859649, "percentage": 24.14, "elapsed_time": "0:58:25", "remaining_time": "3:03:37"}
+{"current_steps": 8610, "total_steps": 35625, "loss": 0.5907, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3156424657831596e-05, "epoch": 1.208421052631579, "percentage": 24.17, "elapsed_time": "0:58:29", "remaining_time": "3:03:30"}
+{"current_steps": 8620, "total_steps": 35625, "loss": 0.6086, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3141253963528795e-05, "epoch": 1.2098245614035088, "percentage": 24.2, "elapsed_time": "0:58:33", "remaining_time": "3:03:26"}
+{"current_steps": 8630, "total_steps": 35625, "loss": 0.4966, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3126069145637987e-05, "epoch": 1.2112280701754385, "percentage": 24.22, "elapsed_time": "0:58:37", "remaining_time": "3:03:22"}
+{"current_steps": 8640, "total_steps": 35625, "loss": 0.5713, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3110870215981095e-05, "epoch": 1.2126315789473685, "percentage": 24.25, "elapsed_time": "0:58:42", "remaining_time": "3:03:20"}
+{"current_steps": 8650, "total_steps": 35625, "loss": 0.5538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.309565718639098e-05, "epoch": 1.2140350877192982, "percentage": 24.28, "elapsed_time": "0:58:45", "remaining_time": "3:03:14"}
+{"current_steps": 8660, "total_steps": 35625, "loss": 0.5065, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.308043006871153e-05, "epoch": 1.215438596491228, "percentage": 24.31, "elapsed_time": "0:58:49", "remaining_time": "3:03:09"}
+{"current_steps": 8670, "total_steps": 35625, "loss": 0.495, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.306518887479758e-05, "epoch": 1.216842105263158, "percentage": 24.34, "elapsed_time": "0:58:52", "remaining_time": "3:03:03"}
+{"current_steps": 8680, "total_steps": 35625, "loss": 0.5217, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.3049933616514895e-05, "epoch": 1.2182456140350877, "percentage": 24.36, "elapsed_time": "0:58:57", "remaining_time": "3:02:59"}
+{"current_steps": 8690, "total_steps": 35625, "loss": 0.5196, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.303466430574024e-05, "epoch": 1.2196491228070174, "percentage": 24.39, "elapsed_time": "0:59:01", "remaining_time": "3:02:57"}
+{"current_steps": 8700, "total_steps": 35625, "loss": 0.4687, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.301938095436129e-05, "epoch": 1.2210526315789474, "percentage": 24.42, "elapsed_time": "0:59:05", "remaining_time": "3:02:53"}
+{"current_steps": 8710, "total_steps": 35625, "loss": 0.6043, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.300408357427666e-05, "epoch": 1.2224561403508771, "percentage": 24.45, "elapsed_time": "0:59:09", "remaining_time": "3:02:47"}
+{"current_steps": 8720, "total_steps": 35625, "loss": 0.5359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.298877217739587e-05, "epoch": 1.223859649122807, "percentage": 24.48, "elapsed_time": "0:59:12", "remaining_time": "3:02:41"}
+{"current_steps": 8730, "total_steps": 35625, "loss": 0.4502, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.29734467756394e-05, "epoch": 1.2252631578947368, "percentage": 24.51, "elapsed_time": "0:59:17", "remaining_time": "3:02:38"}
+{"current_steps": 8740, "total_steps": 35625, "loss": 0.4823, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2958107380938564e-05, "epoch": 1.2266666666666666, "percentage": 24.53, "elapsed_time": "0:59:20", "remaining_time": "3:02:33"}
+{"current_steps": 8750, "total_steps": 35625, "loss": 0.4295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.294275400523564e-05, "epoch": 1.2280701754385965, "percentage": 24.56, "elapsed_time": "0:59:23", "remaining_time": "3:02:26"}
+{"current_steps": 8760, "total_steps": 35625, "loss": 0.4983, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2927386660483726e-05, "epoch": 1.2294736842105263, "percentage": 24.59, "elapsed_time": "0:59:27", "remaining_time": "3:02:19"}
+{"current_steps": 8770, "total_steps": 35625, "loss": 0.5405, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.291200535864684e-05, "epoch": 1.2308771929824562, "percentage": 24.62, "elapsed_time": "0:59:31", "remaining_time": "3:02:15"}
+{"current_steps": 8780, "total_steps": 35625, "loss": 0.5094, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.289661011169986e-05, "epoch": 1.232280701754386, "percentage": 24.65, "elapsed_time": "0:59:36", "remaining_time": "3:02:15"}
+{"current_steps": 8790, "total_steps": 35625, "loss": 0.5622, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.28812009316285e-05, "epoch": 1.2336842105263157, "percentage": 24.67, "elapsed_time": "0:59:39", "remaining_time": "3:02:08"}
+{"current_steps": 8800, "total_steps": 35625, "loss": 0.4577, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.286577783042934e-05, "epoch": 1.2350877192982457, "percentage": 24.7, "elapsed_time": "0:59:43", "remaining_time": "3:02:02"}
+{"current_steps": 8810, "total_steps": 35625, "loss": 0.6015, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.285034082010981e-05, "epoch": 1.2364912280701754, "percentage": 24.73, "elapsed_time": "0:59:47", "remaining_time": "3:01:57"}
+{"current_steps": 8820, "total_steps": 35625, "loss": 0.5716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2834889912688126e-05, "epoch": 1.2378947368421054, "percentage": 24.76, "elapsed_time": "0:59:50", "remaining_time": "3:01:52"}
+{"current_steps": 8830, "total_steps": 35625, "loss": 0.5634, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.281942512019336e-05, "epoch": 1.2392982456140351, "percentage": 24.79, "elapsed_time": "0:59:54", "remaining_time": "3:01:46"}
+{"current_steps": 8840, "total_steps": 35625, "loss": 0.4982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2803946454665376e-05, "epoch": 1.2407017543859649, "percentage": 24.81, "elapsed_time": "0:59:57", "remaining_time": "3:01:39"}
+{"current_steps": 8850, "total_steps": 35625, "loss": 0.5006, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2788453928154855e-05, "epoch": 1.2421052631578948, "percentage": 24.84, "elapsed_time": "1:00:00", "remaining_time": "3:01:32"}
+{"current_steps": 8860, "total_steps": 35625, "loss": 0.4901, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2772947552723266e-05, "epoch": 1.2435087719298246, "percentage": 24.87, "elapsed_time": "1:00:05", "remaining_time": "3:01:30"}
+{"current_steps": 8870, "total_steps": 35625, "loss": 0.4847, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.275742734044283e-05, "epoch": 1.2449122807017543, "percentage": 24.9, "elapsed_time": "1:00:09", "remaining_time": "3:01:27"}
+{"current_steps": 8880, "total_steps": 35625, "loss": 0.5224, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.274189330339658e-05, "epoch": 1.2463157894736843, "percentage": 24.93, "elapsed_time": "1:00:15", "remaining_time": "3:01:28"}
+{"current_steps": 8890, "total_steps": 35625, "loss": 0.4698, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.272634545367831e-05, "epoch": 1.247719298245614, "percentage": 24.95, "elapsed_time": "1:00:19", "remaining_time": "3:01:23"}
+{"current_steps": 8900, "total_steps": 35625, "loss": 0.5801, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.271078380339252e-05, "epoch": 1.2491228070175437, "percentage": 24.98, "elapsed_time": "1:00:23", "remaining_time": "3:01:19"}
+{"current_steps": 8910, "total_steps": 35625, "loss": 0.6584, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.269520836465452e-05, "epoch": 1.2505263157894737, "percentage": 25.01, "elapsed_time": "1:00:26", "remaining_time": "3:01:13"}
+{"current_steps": 8920, "total_steps": 35625, "loss": 0.5752, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2679619149590304e-05, "epoch": 1.2519298245614034, "percentage": 25.04, "elapsed_time": "1:00:29", "remaining_time": "3:01:05"}
+{"current_steps": 8930, "total_steps": 35625, "loss": 0.4829, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.266401617033662e-05, "epoch": 1.2533333333333334, "percentage": 25.07, "elapsed_time": "1:00:32", "remaining_time": "3:00:59"}
+{"current_steps": 8940, "total_steps": 35625, "loss": 0.5411, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.264839943904091e-05, "epoch": 1.2547368421052632, "percentage": 25.09, "elapsed_time": "1:00:36", "remaining_time": "3:00:53"}
+{"current_steps": 8950, "total_steps": 35625, "loss": 0.5089, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2632768967861345e-05, "epoch": 1.256140350877193, "percentage": 25.12, "elapsed_time": "1:00:40", "remaining_time": "3:00:51"}
+{"current_steps": 8960, "total_steps": 35625, "loss": 0.6257, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.261712476896679e-05, "epoch": 1.2575438596491229, "percentage": 25.15, "elapsed_time": "1:00:45", "remaining_time": "3:00:49"}
+{"current_steps": 8970, "total_steps": 35625, "loss": 0.5403, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2601466854536774e-05, "epoch": 1.2589473684210526, "percentage": 25.18, "elapsed_time": "1:00:48", "remaining_time": "3:00:42"}
+{"current_steps": 8980, "total_steps": 35625, "loss": 0.5305, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2585795236761526e-05, "epoch": 1.2603508771929826, "percentage": 25.21, "elapsed_time": "1:00:52", "remaining_time": "3:00:37"}
+{"current_steps": 8990, "total_steps": 35625, "loss": 0.5776, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.257010992784194e-05, "epoch": 1.2617543859649123, "percentage": 25.24, "elapsed_time": "1:00:56", "remaining_time": "3:00:31"}
+{"current_steps": 9000, "total_steps": 35625, "loss": 0.5772, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.255441093998956e-05, "epoch": 1.263157894736842, "percentage": 25.26, "elapsed_time": "1:00:59", "remaining_time": "3:00:25"}
+{"current_steps": 9010, "total_steps": 35625, "loss": 0.589, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.253869828542659e-05, "epoch": 1.264561403508772, "percentage": 25.29, "elapsed_time": "1:01:02", "remaining_time": "3:00:19"}
+{"current_steps": 9020, "total_steps": 35625, "loss": 0.5012, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2522971976385876e-05, "epoch": 1.2659649122807017, "percentage": 25.32, "elapsed_time": "1:01:06", "remaining_time": "3:00:15"}
+{"current_steps": 9030, "total_steps": 35625, "loss": 0.4813, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.250723202511089e-05, "epoch": 1.2673684210526317, "percentage": 25.35, "elapsed_time": "1:01:10", "remaining_time": "3:00:11"}
+{"current_steps": 9040, "total_steps": 35625, "loss": 0.513, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2491478443855704e-05, "epoch": 1.2687719298245614, "percentage": 25.38, "elapsed_time": "1:01:13", "remaining_time": "3:00:04"}
+{"current_steps": 9050, "total_steps": 35625, "loss": 0.6229, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.247571124488504e-05, "epoch": 1.2701754385964912, "percentage": 25.4, "elapsed_time": "1:01:17", "remaining_time": "2:59:57"}
+{"current_steps": 9060, "total_steps": 35625, "loss": 0.5493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2459930440474194e-05, "epoch": 1.271578947368421, "percentage": 25.43, "elapsed_time": "1:01:20", "remaining_time": "2:59:51"}
+{"current_steps": 9070, "total_steps": 35625, "loss": 0.4845, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2444136042909064e-05, "epoch": 1.2729824561403509, "percentage": 25.46, "elapsed_time": "1:01:23", "remaining_time": "2:59:45"}
+{"current_steps": 9080, "total_steps": 35625, "loss": 0.5174, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2428328064486134e-05, "epoch": 1.2743859649122806, "percentage": 25.49, "elapsed_time": "1:01:28", "remaining_time": "2:59:42"}
+{"current_steps": 9090, "total_steps": 35625, "loss": 0.501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2412506517512456e-05, "epoch": 1.2757894736842106, "percentage": 25.52, "elapsed_time": "1:01:31", "remaining_time": "2:59:36"}
+{"current_steps": 9100, "total_steps": 35625, "loss": 0.6422, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.239667141430564e-05, "epoch": 1.2771929824561403, "percentage": 25.54, "elapsed_time": "1:01:35", "remaining_time": "2:59:31"}
+{"current_steps": 9110, "total_steps": 35625, "loss": 0.5323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.238082276719387e-05, "epoch": 1.27859649122807, "percentage": 25.57, "elapsed_time": "1:01:39", "remaining_time": "2:59:26"}
+{"current_steps": 9120, "total_steps": 35625, "loss": 0.4542, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.236496058851585e-05, "epoch": 1.28, "percentage": 25.6, "elapsed_time": "1:01:43", "remaining_time": "2:59:24"}
+{"current_steps": 9130, "total_steps": 35625, "loss": 0.5697, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.234908489062083e-05, "epoch": 1.2814035087719298, "percentage": 25.63, "elapsed_time": "1:01:48", "remaining_time": "2:59:20"}
+{"current_steps": 9140, "total_steps": 35625, "loss": 0.5108, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.233319568586859e-05, "epoch": 1.2828070175438597, "percentage": 25.66, "elapsed_time": "1:01:51", "remaining_time": "2:59:13"}
+{"current_steps": 9150, "total_steps": 35625, "loss": 0.4472, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.231729298662942e-05, "epoch": 1.2842105263157895, "percentage": 25.68, "elapsed_time": "1:01:54", "remaining_time": "2:59:06"}
+{"current_steps": 9160, "total_steps": 35625, "loss": 0.62, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.230137680528411e-05, "epoch": 1.2856140350877192, "percentage": 25.71, "elapsed_time": "1:01:58", "remaining_time": "2:59:04"}
+{"current_steps": 9170, "total_steps": 35625, "loss": 0.5226, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.228544715422395e-05, "epoch": 1.2870175438596492, "percentage": 25.74, "elapsed_time": "1:02:01", "remaining_time": "2:58:57"}
+{"current_steps": 9180, "total_steps": 35625, "loss": 0.5492, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2269504045850744e-05, "epoch": 1.288421052631579, "percentage": 25.77, "elapsed_time": "1:02:05", "remaining_time": "2:58:51"}
+{"current_steps": 9190, "total_steps": 35625, "loss": 0.5359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.225354749257673e-05, "epoch": 1.2898245614035089, "percentage": 25.8, "elapsed_time": "1:02:08", "remaining_time": "2:58:45"}
+{"current_steps": 9200, "total_steps": 35625, "loss": 0.535, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2237577506824624e-05, "epoch": 1.2912280701754386, "percentage": 25.82, "elapsed_time": "1:02:12", "remaining_time": "2:58:39"}
+{"current_steps": 9210, "total_steps": 35625, "loss": 0.4581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.222159410102761e-05, "epoch": 1.2926315789473684, "percentage": 25.85, "elapsed_time": "1:02:16", "remaining_time": "2:58:35"}
+{"current_steps": 9220, "total_steps": 35625, "loss": 0.5109, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.220559728762933e-05, "epoch": 1.2940350877192983, "percentage": 25.88, "elapsed_time": "1:02:19", "remaining_time": "2:58:29"}
+{"current_steps": 9230, "total_steps": 35625, "loss": 0.4501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2189587079083846e-05, "epoch": 1.295438596491228, "percentage": 25.91, "elapsed_time": "1:02:23", "remaining_time": "2:58:24"}
+{"current_steps": 9240, "total_steps": 35625, "loss": 0.5574, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.217356348785565e-05, "epoch": 1.296842105263158, "percentage": 25.94, "elapsed_time": "1:02:27", "remaining_time": "2:58:20"}
+{"current_steps": 9250, "total_steps": 35625, "loss": 0.5558, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.215752652641967e-05, "epoch": 1.2982456140350878, "percentage": 25.96, "elapsed_time": "1:02:30", "remaining_time": "2:58:13"}
+{"current_steps": 9260, "total_steps": 35625, "loss": 0.4734, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.21414762072612e-05, "epoch": 1.2996491228070175, "percentage": 25.99, "elapsed_time": "1:02:35", "remaining_time": "2:58:11"}
+{"current_steps": 9270, "total_steps": 35625, "loss": 0.574, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2125412542876e-05, "epoch": 1.3010526315789472, "percentage": 26.02, "elapsed_time": "1:02:39", "remaining_time": "2:58:07"}
+{"current_steps": 9280, "total_steps": 35625, "loss": 0.4505, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.210933554577016e-05, "epoch": 1.3024561403508772, "percentage": 26.05, "elapsed_time": "1:02:42", "remaining_time": "2:58:01"}
+{"current_steps": 9290, "total_steps": 35625, "loss": 0.5021, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.209324522846018e-05, "epoch": 1.303859649122807, "percentage": 26.08, "elapsed_time": "1:02:45", "remaining_time": "2:57:54"}
+{"current_steps": 9300, "total_steps": 35625, "loss": 0.4925, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.207714160347292e-05, "epoch": 1.305263157894737, "percentage": 26.11, "elapsed_time": "1:02:48", "remaining_time": "2:57:48"}
+{"current_steps": 9310, "total_steps": 35625, "loss": 0.6289, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.206102468334561e-05, "epoch": 1.3066666666666666, "percentage": 26.13, "elapsed_time": "1:02:51", "remaining_time": "2:57:40"}
+{"current_steps": 9320, "total_steps": 35625, "loss": 0.5387, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2044894480625825e-05, "epoch": 1.3080701754385964, "percentage": 26.16, "elapsed_time": "1:02:57", "remaining_time": "2:57:41"}
+{"current_steps": 9330, "total_steps": 35625, "loss": 0.5788, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.202875100787147e-05, "epoch": 1.3094736842105263, "percentage": 26.19, "elapsed_time": "1:03:00", "remaining_time": "2:57:35"}
+{"current_steps": 9340, "total_steps": 35625, "loss": 0.5313, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.201259427765081e-05, "epoch": 1.310877192982456, "percentage": 26.22, "elapsed_time": "1:03:05", "remaining_time": "2:57:34"}
+{"current_steps": 9350, "total_steps": 35625, "loss": 0.5948, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1996424302542404e-05, "epoch": 1.312280701754386, "percentage": 26.25, "elapsed_time": "1:03:09", "remaining_time": "2:57:29"}
+{"current_steps": 9360, "total_steps": 35625, "loss": 0.4913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.198024109513512e-05, "epoch": 1.3136842105263158, "percentage": 26.27, "elapsed_time": "1:03:13", "remaining_time": "2:57:23"}
+{"current_steps": 9370, "total_steps": 35625, "loss": 0.4895, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.196404466802816e-05, "epoch": 1.3150877192982455, "percentage": 26.3, "elapsed_time": "1:03:17", "remaining_time": "2:57:19"}
+{"current_steps": 9380, "total_steps": 35625, "loss": 0.5537, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.194783503383098e-05, "epoch": 1.3164912280701755, "percentage": 26.33, "elapsed_time": "1:03:20", "remaining_time": "2:57:12"}
+{"current_steps": 9390, "total_steps": 35625, "loss": 0.5641, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.193161220516334e-05, "epoch": 1.3178947368421052, "percentage": 26.36, "elapsed_time": "1:03:24", "remaining_time": "2:57:08"}
+{"current_steps": 9400, "total_steps": 35625, "loss": 0.464, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.191537619465529e-05, "epoch": 1.3192982456140352, "percentage": 26.39, "elapsed_time": "1:03:28", "remaining_time": "2:57:05"}
+{"current_steps": 9410, "total_steps": 35625, "loss": 0.4657, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.189912701494709e-05, "epoch": 1.320701754385965, "percentage": 26.41, "elapsed_time": "1:03:32", "remaining_time": "2:56:59"}
+{"current_steps": 9420, "total_steps": 35625, "loss": 0.5113, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1882864678689296e-05, "epoch": 1.3221052631578947, "percentage": 26.44, "elapsed_time": "1:03:36", "remaining_time": "2:56:57"}
+{"current_steps": 9430, "total_steps": 35625, "loss": 0.5593, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.186658919854269e-05, "epoch": 1.3235087719298246, "percentage": 26.47, "elapsed_time": "1:03:40", "remaining_time": "2:56:52"}
+{"current_steps": 9440, "total_steps": 35625, "loss": 0.4578, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1850300587178304e-05, "epoch": 1.3249122807017544, "percentage": 26.5, "elapsed_time": "1:03:44", "remaining_time": "2:56:47"}
+{"current_steps": 9450, "total_steps": 35625, "loss": 0.5637, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.183399885727737e-05, "epoch": 1.3263157894736843, "percentage": 26.53, "elapsed_time": "1:03:47", "remaining_time": "2:56:41"}
+{"current_steps": 9460, "total_steps": 35625, "loss": 0.5491, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.181768402153135e-05, "epoch": 1.327719298245614, "percentage": 26.55, "elapsed_time": "1:03:50", "remaining_time": "2:56:35"}
+{"current_steps": 9470, "total_steps": 35625, "loss": 0.5558, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1801356092641886e-05, "epoch": 1.3291228070175438, "percentage": 26.58, "elapsed_time": "1:03:57", "remaining_time": "2:56:38"}
+{"current_steps": 9480, "total_steps": 35625, "loss": 0.4543, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.178501508332085e-05, "epoch": 1.3305263157894736, "percentage": 26.61, "elapsed_time": "1:04:01", "remaining_time": "2:56:35"}
+{"current_steps": 9490, "total_steps": 35625, "loss": 0.5832, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.176866100629027e-05, "epoch": 1.3319298245614035, "percentage": 26.64, "elapsed_time": "1:04:05", "remaining_time": "2:56:29"}
+{"current_steps": 9500, "total_steps": 35625, "loss": 0.5378, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.175229387428235e-05, "epoch": 1.3333333333333333, "percentage": 26.67, "elapsed_time": "1:04:08", "remaining_time": "2:56:23"}
+{"current_steps": 9510, "total_steps": 35625, "loss": 0.5046, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1735913700039477e-05, "epoch": 1.3347368421052632, "percentage": 26.69, "elapsed_time": "1:04:12", "remaining_time": "2:56:20"}
+{"current_steps": 9520, "total_steps": 35625, "loss": 0.5171, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.171952049631416e-05, "epoch": 1.336140350877193, "percentage": 26.72, "elapsed_time": "1:04:17", "remaining_time": "2:56:18"}
+{"current_steps": 9530, "total_steps": 35625, "loss": 0.5939, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.170311427586908e-05, "epoch": 1.3375438596491227, "percentage": 26.75, "elapsed_time": "1:04:21", "remaining_time": "2:56:13"}
+{"current_steps": 9540, "total_steps": 35625, "loss": 0.5768, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.168669505147705e-05, "epoch": 1.3389473684210527, "percentage": 26.78, "elapsed_time": "1:04:25", "remaining_time": "2:56:09"}
+{"current_steps": 9550, "total_steps": 35625, "loss": 0.5029, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1670262835920996e-05, "epoch": 1.3403508771929824, "percentage": 26.81, "elapsed_time": "1:04:29", "remaining_time": "2:56:05"}
+{"current_steps": 9560, "total_steps": 35625, "loss": 0.4611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1653817641993936e-05, "epoch": 1.3417543859649124, "percentage": 26.84, "elapsed_time": "1:04:33", "remaining_time": "2:56:00"}
+{"current_steps": 9570, "total_steps": 35625, "loss": 0.5701, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.163735948249905e-05, "epoch": 1.343157894736842, "percentage": 26.86, "elapsed_time": "1:04:37", "remaining_time": "2:55:57"}
+{"current_steps": 9580, "total_steps": 35625, "loss": 0.5356, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.162088837024956e-05, "epoch": 1.3445614035087718, "percentage": 26.89, "elapsed_time": "1:04:41", "remaining_time": "2:55:51"}
+{"current_steps": 9590, "total_steps": 35625, "loss": 0.4985, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.16044043180688e-05, "epoch": 1.3459649122807018, "percentage": 26.92, "elapsed_time": "1:04:45", "remaining_time": "2:55:48"}
+{"current_steps": 9600, "total_steps": 35625, "loss": 0.5036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.158790733879017e-05, "epoch": 1.3473684210526315, "percentage": 26.95, "elapsed_time": "1:04:48", "remaining_time": "2:55:41"}
+{"current_steps": 9610, "total_steps": 35625, "loss": 0.5212, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1571397445257124e-05, "epoch": 1.3487719298245615, "percentage": 26.98, "elapsed_time": "1:04:52", "remaining_time": "2:55:37"}
+{"current_steps": 9620, "total_steps": 35625, "loss": 0.5225, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.155487465032319e-05, "epoch": 1.3501754385964913, "percentage": 27.0, "elapsed_time": "1:04:57", "remaining_time": "2:55:35"}
+{"current_steps": 9630, "total_steps": 35625, "loss": 0.4985, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.153833896685193e-05, "epoch": 1.351578947368421, "percentage": 27.03, "elapsed_time": "1:05:01", "remaining_time": "2:55:30"}
+{"current_steps": 9640, "total_steps": 35625, "loss": 0.5386, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1521790407716936e-05, "epoch": 1.352982456140351, "percentage": 27.06, "elapsed_time": "1:05:04", "remaining_time": "2:55:25"}
+{"current_steps": 9650, "total_steps": 35625, "loss": 0.5283, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.150522898580183e-05, "epoch": 1.3543859649122807, "percentage": 27.09, "elapsed_time": "1:05:09", "remaining_time": "2:55:22"}
+{"current_steps": 9660, "total_steps": 35625, "loss": 0.5684, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.148865471400024e-05, "epoch": 1.3557894736842107, "percentage": 27.12, "elapsed_time": "1:05:12", "remaining_time": "2:55:17"}
+{"current_steps": 9670, "total_steps": 35625, "loss": 0.525, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.147206760521582e-05, "epoch": 1.3571929824561404, "percentage": 27.14, "elapsed_time": "1:05:16", "remaining_time": "2:55:13"}
+{"current_steps": 9680, "total_steps": 35625, "loss": 0.5258, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.145546767236219e-05, "epoch": 1.3585964912280701, "percentage": 27.17, "elapsed_time": "1:05:20", "remaining_time": "2:55:08"}
+{"current_steps": 9690, "total_steps": 35625, "loss": 0.5159, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.143885492836297e-05, "epoch": 1.3599999999999999, "percentage": 27.2, "elapsed_time": "1:05:24", "remaining_time": "2:55:04"}
+{"current_steps": 9700, "total_steps": 35625, "loss": 0.5656, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1422229386151754e-05, "epoch": 1.3614035087719298, "percentage": 27.23, "elapsed_time": "1:05:29", "remaining_time": "2:55:02"}
+{"current_steps": 9710, "total_steps": 35625, "loss": 0.488, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.140559105867209e-05, "epoch": 1.3628070175438596, "percentage": 27.26, "elapsed_time": "1:05:33", "remaining_time": "2:54:57"}
+{"current_steps": 9720, "total_steps": 35625, "loss": 0.457, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1388939958877495e-05, "epoch": 1.3642105263157895, "percentage": 27.28, "elapsed_time": "1:05:37", "remaining_time": "2:54:53"}
+{"current_steps": 9730, "total_steps": 35625, "loss": 0.459, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.137227609973141e-05, "epoch": 1.3656140350877193, "percentage": 27.31, "elapsed_time": "1:05:40", "remaining_time": "2:54:47"}
+{"current_steps": 9740, "total_steps": 35625, "loss": 0.4794, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.135559949420723e-05, "epoch": 1.367017543859649, "percentage": 27.34, "elapsed_time": "1:05:44", "remaining_time": "2:54:41"}
+{"current_steps": 9750, "total_steps": 35625, "loss": 0.5903, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.133891015528826e-05, "epoch": 1.368421052631579, "percentage": 27.37, "elapsed_time": "1:05:47", "remaining_time": "2:54:35"}
+{"current_steps": 9760, "total_steps": 35625, "loss": 0.5521, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.132220809596772e-05, "epoch": 1.3698245614035087, "percentage": 27.4, "elapsed_time": "1:05:51", "remaining_time": "2:54:30"}
+{"current_steps": 9770, "total_steps": 35625, "loss": 0.4667, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1305493329248734e-05, "epoch": 1.3712280701754387, "percentage": 27.42, "elapsed_time": "1:05:57", "remaining_time": "2:54:32"}
+{"current_steps": 9780, "total_steps": 35625, "loss": 0.4307, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.128876586814433e-05, "epoch": 1.3726315789473684, "percentage": 27.45, "elapsed_time": "1:06:01", "remaining_time": "2:54:27"}
+{"current_steps": 9790, "total_steps": 35625, "loss": 0.5016, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.127202572567741e-05, "epoch": 1.3740350877192982, "percentage": 27.48, "elapsed_time": "1:06:04", "remaining_time": "2:54:22"}
+{"current_steps": 9800, "total_steps": 35625, "loss": 0.5489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1255272914880735e-05, "epoch": 1.3754385964912281, "percentage": 27.51, "elapsed_time": "1:06:07", "remaining_time": "2:54:15"}
+{"current_steps": 9810, "total_steps": 35625, "loss": 0.488, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1238507448796945e-05, "epoch": 1.3768421052631579, "percentage": 27.54, "elapsed_time": "1:06:10", "remaining_time": "2:54:08"}
+{"current_steps": 9820, "total_steps": 35625, "loss": 0.5739, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.122172934047855e-05, "epoch": 1.3782456140350878, "percentage": 27.56, "elapsed_time": "1:06:13", "remaining_time": "2:54:01"}
+{"current_steps": 9830, "total_steps": 35625, "loss": 0.5036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.120493860298786e-05, "epoch": 1.3796491228070176, "percentage": 27.59, "elapsed_time": "1:06:17", "remaining_time": "2:53:57"}
+{"current_steps": 9840, "total_steps": 35625, "loss": 0.4737, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1188135249397056e-05, "epoch": 1.3810526315789473, "percentage": 27.62, "elapsed_time": "1:06:20", "remaining_time": "2:53:50"}
+{"current_steps": 9850, "total_steps": 35625, "loss": 0.5341, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.117131929278811e-05, "epoch": 1.3824561403508773, "percentage": 27.65, "elapsed_time": "1:06:24", "remaining_time": "2:53:45"}
+{"current_steps": 9860, "total_steps": 35625, "loss": 0.4567, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1154490746252825e-05, "epoch": 1.383859649122807, "percentage": 27.68, "elapsed_time": "1:06:27", "remaining_time": "2:53:39"}
+{"current_steps": 9870, "total_steps": 35625, "loss": 0.5586, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.113764962289281e-05, "epoch": 1.385263157894737, "percentage": 27.71, "elapsed_time": "1:06:30", "remaining_time": "2:53:33"}
+{"current_steps": 9880, "total_steps": 35625, "loss": 0.5065, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.112079593581944e-05, "epoch": 1.3866666666666667, "percentage": 27.73, "elapsed_time": "1:06:35", "remaining_time": "2:53:30"}
+{"current_steps": 9890, "total_steps": 35625, "loss": 0.5335, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.110392969815391e-05, "epoch": 1.3880701754385965, "percentage": 27.76, "elapsed_time": "1:06:38", "remaining_time": "2:53:24"}
+{"current_steps": 9900, "total_steps": 35625, "loss": 0.5445, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.108705092302715e-05, "epoch": 1.3894736842105262, "percentage": 27.79, "elapsed_time": "1:06:42", "remaining_time": "2:53:21"}
+{"current_steps": 9910, "total_steps": 35625, "loss": 0.5146, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1070159623579855e-05, "epoch": 1.3908771929824562, "percentage": 27.82, "elapsed_time": "1:06:48", "remaining_time": "2:53:21"}
+{"current_steps": 9920, "total_steps": 35625, "loss": 0.4938, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.105325581296251e-05, "epoch": 1.392280701754386, "percentage": 27.85, "elapsed_time": "1:06:51", "remaining_time": "2:53:14"}
+{"current_steps": 9930, "total_steps": 35625, "loss": 0.5353, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.103633950433528e-05, "epoch": 1.3936842105263159, "percentage": 27.87, "elapsed_time": "1:06:55", "remaining_time": "2:53:11"}
+{"current_steps": 9940, "total_steps": 35625, "loss": 0.4869, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.1019410710868115e-05, "epoch": 1.3950877192982456, "percentage": 27.9, "elapsed_time": "1:06:59", "remaining_time": "2:53:05"}
+{"current_steps": 9950, "total_steps": 35625, "loss": 0.4858, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.100246944574064e-05, "epoch": 1.3964912280701753, "percentage": 27.93, "elapsed_time": "1:07:04", "remaining_time": "2:53:04"}
+{"current_steps": 9960, "total_steps": 35625, "loss": 0.5173, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.098551572214223e-05, "epoch": 1.3978947368421053, "percentage": 27.96, "elapsed_time": "1:07:09", "remaining_time": "2:53:02"}
+{"current_steps": 9970, "total_steps": 35625, "loss": 0.5862, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0968549553271926e-05, "epoch": 1.399298245614035, "percentage": 27.99, "elapsed_time": "1:07:12", "remaining_time": "2:52:55"}
+{"current_steps": 9980, "total_steps": 35625, "loss": 0.5312, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.095157095233848e-05, "epoch": 1.400701754385965, "percentage": 28.01, "elapsed_time": "1:07:16", "remaining_time": "2:52:51"}
+{"current_steps": 9990, "total_steps": 35625, "loss": 0.4668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.093457993256031e-05, "epoch": 1.4021052631578947, "percentage": 28.04, "elapsed_time": "1:07:20", "remaining_time": "2:52:48"}
+{"current_steps": 10000, "total_steps": 35625, "loss": 0.5192, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0917576507165514e-05, "epoch": 1.4035087719298245, "percentage": 28.07, "elapsed_time": "1:07:24", "remaining_time": "2:52:43"}
+{"current_steps": 10000, "total_steps": 35625, "loss": null, "eval_loss": 0.6501449942588806, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.4035087719298245, "percentage": 28.07, "elapsed_time": "1:07:24", "remaining_time": "2:52:43"}
+{"current_steps": 10010, "total_steps": 35625, "loss": 0.6369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.090056068939183e-05, "epoch": 1.4049122807017544, "percentage": 28.1, "elapsed_time": "1:08:12", "remaining_time": "2:54:31"}
+{"current_steps": 10020, "total_steps": 35625, "loss": 0.4765, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.088353249248667e-05, "epoch": 1.4063157894736842, "percentage": 28.13, "elapsed_time": "1:08:15", "remaining_time": "2:54:26"}
+{"current_steps": 10030, "total_steps": 35625, "loss": 0.4858, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0866491929707064e-05, "epoch": 1.4077192982456141, "percentage": 28.15, "elapsed_time": "1:08:20", "remaining_time": "2:54:22"}
+{"current_steps": 10040, "total_steps": 35625, "loss": 0.4502, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.084943901431966e-05, "epoch": 1.4091228070175439, "percentage": 28.18, "elapsed_time": "1:08:23", "remaining_time": "2:54:16"}
+{"current_steps": 10050, "total_steps": 35625, "loss": 0.5036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.083237375960075e-05, "epoch": 1.4105263157894736, "percentage": 28.21, "elapsed_time": "1:08:27", "remaining_time": "2:54:12"}
+{"current_steps": 10060, "total_steps": 35625, "loss": 0.5185, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.081529617883622e-05, "epoch": 1.4119298245614036, "percentage": 28.24, "elapsed_time": "1:08:32", "remaining_time": "2:54:09"}
+{"current_steps": 10070, "total_steps": 35625, "loss": 0.4701, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.079820628532155e-05, "epoch": 1.4133333333333333, "percentage": 28.27, "elapsed_time": "1:08:35", "remaining_time": "2:54:04"}
+{"current_steps": 10080, "total_steps": 35625, "loss": 0.5406, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0781104092361813e-05, "epoch": 1.4147368421052633, "percentage": 28.29, "elapsed_time": "1:08:38", "remaining_time": "2:53:57"}
+{"current_steps": 10090, "total_steps": 35625, "loss": 0.4963, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0763989613271635e-05, "epoch": 1.416140350877193, "percentage": 28.32, "elapsed_time": "1:08:42", "remaining_time": "2:53:53"}
+{"current_steps": 10100, "total_steps": 35625, "loss": 0.5931, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0746862861375245e-05, "epoch": 1.4175438596491228, "percentage": 28.35, "elapsed_time": "1:08:46", "remaining_time": "2:53:48"}
+{"current_steps": 10110, "total_steps": 35625, "loss": 0.4908, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.07297238500064e-05, "epoch": 1.4189473684210525, "percentage": 28.38, "elapsed_time": "1:08:50", "remaining_time": "2:53:44"}
+{"current_steps": 10120, "total_steps": 35625, "loss": 0.5732, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0712572592508394e-05, "epoch": 1.4203508771929825, "percentage": 28.41, "elapsed_time": "1:08:54", "remaining_time": "2:53:38"}
+{"current_steps": 10130, "total_steps": 35625, "loss": 0.5323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.069540910223409e-05, "epoch": 1.4217543859649122, "percentage": 28.44, "elapsed_time": "1:08:57", "remaining_time": "2:53:34"}
+{"current_steps": 10140, "total_steps": 35625, "loss": 0.5727, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.067823339254584e-05, "epoch": 1.4231578947368422, "percentage": 28.46, "elapsed_time": "1:09:02", "remaining_time": "2:53:31"}
+{"current_steps": 10150, "total_steps": 35625, "loss": 0.5295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.066104547681553e-05, "epoch": 1.424561403508772, "percentage": 28.49, "elapsed_time": "1:09:05", "remaining_time": "2:53:25"}
+{"current_steps": 10160, "total_steps": 35625, "loss": 0.554, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0643845368424545e-05, "epoch": 1.4259649122807017, "percentage": 28.52, "elapsed_time": "1:09:10", "remaining_time": "2:53:24"}
+{"current_steps": 10170, "total_steps": 35625, "loss": 0.5138, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.062663308076374e-05, "epoch": 1.4273684210526316, "percentage": 28.55, "elapsed_time": "1:09:14", "remaining_time": "2:53:17"}
+{"current_steps": 10180, "total_steps": 35625, "loss": 0.543, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0609408627233494e-05, "epoch": 1.4287719298245614, "percentage": 28.58, "elapsed_time": "1:09:18", "remaining_time": "2:53:15"}
+{"current_steps": 10190, "total_steps": 35625, "loss": 0.5094, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.059217202124361e-05, "epoch": 1.4301754385964913, "percentage": 28.6, "elapsed_time": "1:09:21", "remaining_time": "2:53:08"}
+{"current_steps": 10200, "total_steps": 35625, "loss": 0.5468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0574923276213405e-05, "epoch": 1.431578947368421, "percentage": 28.63, "elapsed_time": "1:09:26", "remaining_time": "2:53:04"}
+{"current_steps": 10210, "total_steps": 35625, "loss": 0.6082, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0557662405571595e-05, "epoch": 1.4329824561403508, "percentage": 28.66, "elapsed_time": "1:09:29", "remaining_time": "2:52:58"}
+{"current_steps": 10220, "total_steps": 35625, "loss": 0.5164, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.054038942275637e-05, "epoch": 1.4343859649122808, "percentage": 28.69, "elapsed_time": "1:09:32", "remaining_time": "2:52:51"}
+{"current_steps": 10230, "total_steps": 35625, "loss": 0.5451, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.052310434121533e-05, "epoch": 1.4357894736842105, "percentage": 28.72, "elapsed_time": "1:09:36", "remaining_time": "2:52:47"}
+{"current_steps": 10240, "total_steps": 35625, "loss": 0.5821, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.050580717440552e-05, "epoch": 1.4371929824561405, "percentage": 28.74, "elapsed_time": "1:09:39", "remaining_time": "2:52:41"}
+{"current_steps": 10250, "total_steps": 35625, "loss": 0.5088, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.048849793579337e-05, "epoch": 1.4385964912280702, "percentage": 28.77, "elapsed_time": "1:09:43", "remaining_time": "2:52:37"}
+{"current_steps": 10260, "total_steps": 35625, "loss": 0.5441, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.04711766388547e-05, "epoch": 1.44, "percentage": 28.8, "elapsed_time": "1:09:47", "remaining_time": "2:52:31"}
+{"current_steps": 10270, "total_steps": 35625, "loss": 0.494, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0453843297074756e-05, "epoch": 1.4414035087719297, "percentage": 28.83, "elapsed_time": "1:09:51", "remaining_time": "2:52:28"}
+{"current_steps": 10280, "total_steps": 35625, "loss": 0.571, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.043649792394812e-05, "epoch": 1.4428070175438596, "percentage": 28.86, "elapsed_time": "1:09:55", "remaining_time": "2:52:22"}
+{"current_steps": 10290, "total_steps": 35625, "loss": 0.5845, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.041914053297878e-05, "epoch": 1.4442105263157896, "percentage": 28.88, "elapsed_time": "1:09:58", "remaining_time": "2:52:17"}
+{"current_steps": 10300, "total_steps": 35625, "loss": 0.4655, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0401771137680046e-05, "epoch": 1.4456140350877194, "percentage": 28.91, "elapsed_time": "1:10:01", "remaining_time": "2:52:11"}
+{"current_steps": 10310, "total_steps": 35625, "loss": 0.4939, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.038438975157458e-05, "epoch": 1.447017543859649, "percentage": 28.94, "elapsed_time": "1:10:05", "remaining_time": "2:52:05"}
+{"current_steps": 10320, "total_steps": 35625, "loss": 0.6172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.036699638819441e-05, "epoch": 1.4484210526315788, "percentage": 28.97, "elapsed_time": "1:10:09", "remaining_time": "2:52:02"}
+{"current_steps": 10330, "total_steps": 35625, "loss": 0.4888, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0349591061080846e-05, "epoch": 1.4498245614035088, "percentage": 29.0, "elapsed_time": "1:10:17", "remaining_time": "2:52:07"}
+{"current_steps": 10340, "total_steps": 35625, "loss": 0.4427, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0332173783784536e-05, "epoch": 1.4512280701754385, "percentage": 29.02, "elapsed_time": "1:10:26", "remaining_time": "2:52:14"}
+{"current_steps": 10350, "total_steps": 35625, "loss": 0.4867, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.031474456986543e-05, "epoch": 1.4526315789473685, "percentage": 29.05, "elapsed_time": "1:10:36", "remaining_time": "2:52:26"}
+{"current_steps": 10360, "total_steps": 35625, "loss": 0.4401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0297303432892775e-05, "epoch": 1.4540350877192982, "percentage": 29.08, "elapsed_time": "1:10:44", "remaining_time": "2:52:31"}
+{"current_steps": 10370, "total_steps": 35625, "loss": 0.546, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.027985038644507e-05, "epoch": 1.455438596491228, "percentage": 29.11, "elapsed_time": "1:10:52", "remaining_time": "2:52:35"}
+{"current_steps": 10380, "total_steps": 35625, "loss": 0.5211, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.026238544411014e-05, "epoch": 1.456842105263158, "percentage": 29.14, "elapsed_time": "1:11:00", "remaining_time": "2:52:40"}
+{"current_steps": 10390, "total_steps": 35625, "loss": 0.4633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.024490861948503e-05, "epoch": 1.4582456140350877, "percentage": 29.16, "elapsed_time": "1:11:11", "remaining_time": "2:52:53"}
+{"current_steps": 10400, "total_steps": 35625, "loss": 0.5898, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.022741992617603e-05, "epoch": 1.4596491228070176, "percentage": 29.19, "elapsed_time": "1:11:18", "remaining_time": "2:52:58"}
+{"current_steps": 10410, "total_steps": 35625, "loss": 0.4944, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.020991937779872e-05, "epoch": 1.4610526315789474, "percentage": 29.22, "elapsed_time": "1:11:30", "remaining_time": "2:53:11"}
+{"current_steps": 10420, "total_steps": 35625, "loss": 0.55, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.019240698797785e-05, "epoch": 1.4624561403508771, "percentage": 29.25, "elapsed_time": "1:11:41", "remaining_time": "2:53:23"}
+{"current_steps": 10430, "total_steps": 35625, "loss": 0.5103, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.017488277034742e-05, "epoch": 1.463859649122807, "percentage": 29.28, "elapsed_time": "1:11:50", "remaining_time": "2:53:31"}
+{"current_steps": 10440, "total_steps": 35625, "loss": 0.5073, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.015734673855065e-05, "epoch": 1.4652631578947368, "percentage": 29.31, "elapsed_time": "1:11:59", "remaining_time": "2:53:39"}
+{"current_steps": 10450, "total_steps": 35625, "loss": 0.5588, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.013979890623992e-05, "epoch": 1.4666666666666668, "percentage": 29.33, "elapsed_time": "1:12:07", "remaining_time": "2:53:44"}
+{"current_steps": 10460, "total_steps": 35625, "loss": 0.5984, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0122239287076834e-05, "epoch": 1.4680701754385965, "percentage": 29.36, "elapsed_time": "1:12:16", "remaining_time": "2:53:52"}
+{"current_steps": 10470, "total_steps": 35625, "loss": 0.5437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.010466789473215e-05, "epoch": 1.4694736842105263, "percentage": 29.39, "elapsed_time": "1:12:24", "remaining_time": "2:53:57"}
+{"current_steps": 10480, "total_steps": 35625, "loss": 0.4573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.008708474288581e-05, "epoch": 1.470877192982456, "percentage": 29.42, "elapsed_time": "1:12:34", "remaining_time": "2:54:08"}
+{"current_steps": 10490, "total_steps": 35625, "loss": 0.5319, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.006948984522687e-05, "epoch": 1.472280701754386, "percentage": 29.45, "elapsed_time": "1:12:42", "remaining_time": "2:54:11"}
+{"current_steps": 10500, "total_steps": 35625, "loss": 0.4559, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.00518832154536e-05, "epoch": 1.4736842105263157, "percentage": 29.47, "elapsed_time": "1:12:55", "remaining_time": "2:54:28"}
+{"current_steps": 10510, "total_steps": 35625, "loss": 0.4776, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.003426486727335e-05, "epoch": 1.4750877192982457, "percentage": 29.5, "elapsed_time": "1:13:04", "remaining_time": "2:54:37"}
+{"current_steps": 10520, "total_steps": 35625, "loss": 0.5672, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.00166348144026e-05, "epoch": 1.4764912280701754, "percentage": 29.53, "elapsed_time": "1:13:12", "remaining_time": "2:54:42"}
+{"current_steps": 10530, "total_steps": 35625, "loss": 0.5926, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9998993070566954e-05, "epoch": 1.4778947368421052, "percentage": 29.56, "elapsed_time": "1:13:20", "remaining_time": "2:54:47"}
+{"current_steps": 10540, "total_steps": 35625, "loss": 0.4975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.998133964950112e-05, "epoch": 1.4792982456140351, "percentage": 29.59, "elapsed_time": "1:13:28", "remaining_time": "2:54:51"}
+{"current_steps": 10550, "total_steps": 35625, "loss": 0.5229, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9963674564948886e-05, "epoch": 1.4807017543859649, "percentage": 29.61, "elapsed_time": "1:13:38", "remaining_time": "2:55:02"}
+{"current_steps": 10560, "total_steps": 35625, "loss": 0.4995, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9945997830663126e-05, "epoch": 1.4821052631578948, "percentage": 29.64, "elapsed_time": "1:13:46", "remaining_time": "2:55:05"}
+{"current_steps": 10570, "total_steps": 35625, "loss": 0.5464, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.992830946040579e-05, "epoch": 1.4835087719298246, "percentage": 29.67, "elapsed_time": "1:13:54", "remaining_time": "2:55:12"}
+{"current_steps": 10580, "total_steps": 35625, "loss": 0.5691, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9910609467947866e-05, "epoch": 1.4849122807017543, "percentage": 29.7, "elapsed_time": "1:14:02", "remaining_time": "2:55:16"}
+{"current_steps": 10590, "total_steps": 35625, "loss": 0.5982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.989289786706942e-05, "epoch": 1.4863157894736843, "percentage": 29.73, "elapsed_time": "1:14:10", "remaining_time": "2:55:20"}
+{"current_steps": 10600, "total_steps": 35625, "loss": 0.5173, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.987517467155954e-05, "epoch": 1.487719298245614, "percentage": 29.75, "elapsed_time": "1:14:18", "remaining_time": "2:55:25"}
+{"current_steps": 10610, "total_steps": 35625, "loss": 0.4406, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.985743989521633e-05, "epoch": 1.489122807017544, "percentage": 29.78, "elapsed_time": "1:14:26", "remaining_time": "2:55:30"}
+{"current_steps": 10620, "total_steps": 35625, "loss": 0.5417, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9839693551846924e-05, "epoch": 1.4905263157894737, "percentage": 29.81, "elapsed_time": "1:14:34", "remaining_time": "2:55:34"}
+{"current_steps": 10630, "total_steps": 35625, "loss": 0.6226, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.982193565526747e-05, "epoch": 1.4919298245614034, "percentage": 29.84, "elapsed_time": "1:14:43", "remaining_time": "2:55:41"}
+{"current_steps": 10640, "total_steps": 35625, "loss": 0.5337, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9804166219303086e-05, "epoch": 1.4933333333333334, "percentage": 29.87, "elapsed_time": "1:14:53", "remaining_time": "2:55:52"}
+{"current_steps": 10650, "total_steps": 35625, "loss": 0.5027, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9786385257787886e-05, "epoch": 1.4947368421052631, "percentage": 29.89, "elapsed_time": "1:15:03", "remaining_time": "2:56:02"}
+{"current_steps": 10660, "total_steps": 35625, "loss": 0.5676, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9768592784564974e-05, "epoch": 1.496140350877193, "percentage": 29.92, "elapsed_time": "1:15:10", "remaining_time": "2:56:03"}
+{"current_steps": 10670, "total_steps": 35625, "loss": 0.4542, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.975078881348638e-05, "epoch": 1.4975438596491228, "percentage": 29.95, "elapsed_time": "1:15:21", "remaining_time": "2:56:14"}
+{"current_steps": 10680, "total_steps": 35625, "loss": 0.5563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9732973358413115e-05, "epoch": 1.4989473684210526, "percentage": 29.98, "elapsed_time": "1:15:33", "remaining_time": "2:56:29"}
+{"current_steps": 10690, "total_steps": 35625, "loss": 0.4128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.971514643321513e-05, "epoch": 1.5003508771929823, "percentage": 30.01, "elapsed_time": "1:15:41", "remaining_time": "2:56:33"}
+{"current_steps": 10700, "total_steps": 35625, "loss": 0.4909, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.969730805177129e-05, "epoch": 1.5017543859649123, "percentage": 30.04, "elapsed_time": "1:15:51", "remaining_time": "2:56:42"}
+{"current_steps": 10710, "total_steps": 35625, "loss": 0.5664, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.967945822796938e-05, "epoch": 1.5031578947368422, "percentage": 30.06, "elapsed_time": "1:16:02", "remaining_time": "2:56:53"}
+{"current_steps": 10720, "total_steps": 35625, "loss": 0.5827, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9661596975706104e-05, "epoch": 1.504561403508772, "percentage": 30.09, "elapsed_time": "1:16:12", "remaining_time": "2:57:02"}
+{"current_steps": 10730, "total_steps": 35625, "loss": 0.5105, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9643724308887065e-05, "epoch": 1.5059649122807017, "percentage": 30.12, "elapsed_time": "1:16:20", "remaining_time": "2:57:08"}
+{"current_steps": 10740, "total_steps": 35625, "loss": 0.5455, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.962584024142675e-05, "epoch": 1.5073684210526315, "percentage": 30.15, "elapsed_time": "1:16:31", "remaining_time": "2:57:18"}
+{"current_steps": 10750, "total_steps": 35625, "loss": 0.5419, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.96079447872485e-05, "epoch": 1.5087719298245614, "percentage": 30.18, "elapsed_time": "1:16:39", "remaining_time": "2:57:23"}
+{"current_steps": 10760, "total_steps": 35625, "loss": 0.5861, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9590037960284546e-05, "epoch": 1.5101754385964914, "percentage": 30.2, "elapsed_time": "1:16:49", "remaining_time": "2:57:31"}
+{"current_steps": 10770, "total_steps": 35625, "loss": 0.5692, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9572119774475975e-05, "epoch": 1.5115789473684211, "percentage": 30.23, "elapsed_time": "1:16:58", "remaining_time": "2:57:38"}
+{"current_steps": 10780, "total_steps": 35625, "loss": 0.5345, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.95541902437727e-05, "epoch": 1.5129824561403509, "percentage": 30.26, "elapsed_time": "1:17:08", "remaining_time": "2:57:46"}
+{"current_steps": 10790, "total_steps": 35625, "loss": 0.5212, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.953624938213348e-05, "epoch": 1.5143859649122806, "percentage": 30.29, "elapsed_time": "1:17:18", "remaining_time": "2:57:57"}
+{"current_steps": 10800, "total_steps": 35625, "loss": 0.4838, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.95182972035259e-05, "epoch": 1.5157894736842106, "percentage": 30.32, "elapsed_time": "1:17:29", "remaining_time": "2:58:06"}
+{"current_steps": 10810, "total_steps": 35625, "loss": 0.5011, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.950033372192633e-05, "epoch": 1.5171929824561403, "percentage": 30.34, "elapsed_time": "1:17:36", "remaining_time": "2:58:10"}
+{"current_steps": 10820, "total_steps": 35625, "loss": 0.5043, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.948235895131997e-05, "epoch": 1.5185964912280703, "percentage": 30.37, "elapsed_time": "1:17:46", "remaining_time": "2:58:17"}
+{"current_steps": 10830, "total_steps": 35625, "loss": 0.5062, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.946437290570078e-05, "epoch": 1.52, "percentage": 30.4, "elapsed_time": "1:17:57", "remaining_time": "2:58:28"}
+{"current_steps": 10840, "total_steps": 35625, "loss": 0.6164, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.944637559907152e-05, "epoch": 1.5214035087719298, "percentage": 30.43, "elapsed_time": "1:18:04", "remaining_time": "2:58:30"}
+{"current_steps": 10850, "total_steps": 35625, "loss": 0.5159, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9428367045443704e-05, "epoch": 1.5228070175438595, "percentage": 30.46, "elapsed_time": "1:18:14", "remaining_time": "2:58:38"}
+{"current_steps": 10860, "total_steps": 35625, "loss": 0.6505, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.941034725883762e-05, "epoch": 1.5242105263157895, "percentage": 30.48, "elapsed_time": "1:18:21", "remaining_time": "2:58:41"}
+{"current_steps": 10870, "total_steps": 35625, "loss": 0.4808, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.939231625328229e-05, "epoch": 1.5256140350877194, "percentage": 30.51, "elapsed_time": "1:18:34", "remaining_time": "2:58:56"}
+{"current_steps": 10880, "total_steps": 35625, "loss": 0.5194, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9374274042815465e-05, "epoch": 1.5270175438596492, "percentage": 30.54, "elapsed_time": "1:18:41", "remaining_time": "2:58:59"}
+{"current_steps": 10890, "total_steps": 35625, "loss": 0.5079, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.935622064148361e-05, "epoch": 1.528421052631579, "percentage": 30.57, "elapsed_time": "1:18:51", "remaining_time": "2:59:07"}
+{"current_steps": 10900, "total_steps": 35625, "loss": 0.4808, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9338156063341946e-05, "epoch": 1.5298245614035086, "percentage": 30.6, "elapsed_time": "1:19:00", "remaining_time": "2:59:12"}
+{"current_steps": 10910, "total_steps": 35625, "loss": 0.429, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.932008032245434e-05, "epoch": 1.5312280701754386, "percentage": 30.62, "elapsed_time": "1:19:07", "remaining_time": "2:59:14"}
+{"current_steps": 10920, "total_steps": 35625, "loss": 0.489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.930199343289339e-05, "epoch": 1.5326315789473686, "percentage": 30.65, "elapsed_time": "1:19:17", "remaining_time": "2:59:23"}
+{"current_steps": 10930, "total_steps": 35625, "loss": 0.4881, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9283895408740355e-05, "epoch": 1.5340350877192983, "percentage": 30.68, "elapsed_time": "1:19:26", "remaining_time": "2:59:30"}
+{"current_steps": 10940, "total_steps": 35625, "loss": 0.5913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.926578626408517e-05, "epoch": 1.535438596491228, "percentage": 30.71, "elapsed_time": "1:19:34", "remaining_time": "2:59:33"}
+{"current_steps": 10950, "total_steps": 35625, "loss": 0.4719, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.924766601302642e-05, "epoch": 1.5368421052631578, "percentage": 30.74, "elapsed_time": "1:19:44", "remaining_time": "2:59:40"}
+{"current_steps": 10960, "total_steps": 35625, "loss": 0.5445, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9229534669671344e-05, "epoch": 1.5382456140350877, "percentage": 30.76, "elapsed_time": "1:19:53", "remaining_time": "2:59:46"}
+{"current_steps": 10970, "total_steps": 35625, "loss": 0.4989, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9211392248135815e-05, "epoch": 1.5396491228070175, "percentage": 30.79, "elapsed_time": "1:20:01", "remaining_time": "2:59:50"}
+{"current_steps": 10980, "total_steps": 35625, "loss": 0.5321, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9193238762544325e-05, "epoch": 1.5410526315789475, "percentage": 30.82, "elapsed_time": "1:20:11", "remaining_time": "2:59:58"}
+{"current_steps": 10990, "total_steps": 35625, "loss": 0.4765, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9175074227029996e-05, "epoch": 1.5424561403508772, "percentage": 30.85, "elapsed_time": "1:20:20", "remaining_time": "3:00:05"}
+{"current_steps": 11000, "total_steps": 35625, "loss": 0.5748, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.915689865573454e-05, "epoch": 1.543859649122807, "percentage": 30.88, "elapsed_time": "1:20:28", "remaining_time": "3:00:10"}
+{"current_steps": 11010, "total_steps": 35625, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.913871206280824e-05, "epoch": 1.545263157894737, "percentage": 30.91, "elapsed_time": "1:20:39", "remaining_time": "3:00:18"}
+{"current_steps": 11020, "total_steps": 35625, "loss": 0.507, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.912051446241001e-05, "epoch": 1.5466666666666666, "percentage": 30.93, "elapsed_time": "1:20:47", "remaining_time": "3:00:23"}
+{"current_steps": 11030, "total_steps": 35625, "loss": 0.5738, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.910230586870729e-05, "epoch": 1.5480701754385966, "percentage": 30.96, "elapsed_time": "1:20:55", "remaining_time": "3:00:27"}
+{"current_steps": 11040, "total_steps": 35625, "loss": 0.4437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.90840862958761e-05, "epoch": 1.5494736842105263, "percentage": 30.99, "elapsed_time": "1:21:03", "remaining_time": "3:00:31"}
+{"current_steps": 11050, "total_steps": 35625, "loss": 0.4859, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.9065855758101e-05, "epoch": 1.550877192982456, "percentage": 31.02, "elapsed_time": "1:21:16", "remaining_time": "3:00:46"}
+{"current_steps": 11060, "total_steps": 35625, "loss": 0.5433, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.904761426957509e-05, "epoch": 1.5522807017543858, "percentage": 31.05, "elapsed_time": "1:21:27", "remaining_time": "3:00:56"}
+{"current_steps": 11070, "total_steps": 35625, "loss": 0.5938, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.902936184449999e-05, "epoch": 1.5536842105263158, "percentage": 31.07, "elapsed_time": "1:21:36", "remaining_time": "3:01:00"}
+{"current_steps": 11080, "total_steps": 35625, "loss": 0.4484, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.901109849708585e-05, "epoch": 1.5550877192982457, "percentage": 31.1, "elapsed_time": "1:21:46", "remaining_time": "3:01:08"}
+{"current_steps": 11090, "total_steps": 35625, "loss": 0.4353, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8992824241551295e-05, "epoch": 1.5564912280701755, "percentage": 31.13, "elapsed_time": "1:21:55", "remaining_time": "3:01:13"}
+{"current_steps": 11100, "total_steps": 35625, "loss": 0.4497, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.897453909212348e-05, "epoch": 1.5578947368421052, "percentage": 31.16, "elapsed_time": "1:22:03", "remaining_time": "3:01:17"}
+{"current_steps": 11110, "total_steps": 35625, "loss": 0.4648, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.895624306303799e-05, "epoch": 1.559298245614035, "percentage": 31.19, "elapsed_time": "1:22:10", "remaining_time": "3:01:20"}
+{"current_steps": 11120, "total_steps": 35625, "loss": 0.5921, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.893793616853894e-05, "epoch": 1.560701754385965, "percentage": 31.21, "elapsed_time": "1:22:18", "remaining_time": "3:01:22"}
+{"current_steps": 11130, "total_steps": 35625, "loss": 0.4611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.891961842287886e-05, "epoch": 1.5621052631578949, "percentage": 31.24, "elapsed_time": "1:22:25", "remaining_time": "3:01:25"}
+{"current_steps": 11140, "total_steps": 35625, "loss": 0.4745, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.890128984031876e-05, "epoch": 1.5635087719298246, "percentage": 31.27, "elapsed_time": "1:22:35", "remaining_time": "3:01:32"}
+{"current_steps": 11150, "total_steps": 35625, "loss": 0.5716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.888295043512804e-05, "epoch": 1.5649122807017544, "percentage": 31.3, "elapsed_time": "1:22:48", "remaining_time": "3:01:45"}
+{"current_steps": 11160, "total_steps": 35625, "loss": 0.5193, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.886460022158458e-05, "epoch": 1.566315789473684, "percentage": 31.33, "elapsed_time": "1:22:55", "remaining_time": "3:01:47"}
+{"current_steps": 11170, "total_steps": 35625, "loss": 0.4974, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.884623921397463e-05, "epoch": 1.567719298245614, "percentage": 31.35, "elapsed_time": "1:23:02", "remaining_time": "3:01:49"}
+{"current_steps": 11180, "total_steps": 35625, "loss": 0.4418, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.882786742659289e-05, "epoch": 1.5691228070175438, "percentage": 31.38, "elapsed_time": "1:23:12", "remaining_time": "3:01:55"}
+{"current_steps": 11190, "total_steps": 35625, "loss": 0.5278, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.880948487374241e-05, "epoch": 1.5705263157894738, "percentage": 31.41, "elapsed_time": "1:23:21", "remaining_time": "3:02:02"}
+{"current_steps": 11200, "total_steps": 35625, "loss": 0.476, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8791091569734625e-05, "epoch": 1.5719298245614035, "percentage": 31.44, "elapsed_time": "1:23:31", "remaining_time": "3:02:08"}
+{"current_steps": 11210, "total_steps": 35625, "loss": 0.581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8772687528889385e-05, "epoch": 1.5733333333333333, "percentage": 31.47, "elapsed_time": "1:23:41", "remaining_time": "3:02:15"}
+{"current_steps": 11220, "total_steps": 35625, "loss": 0.5076, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.875427276553485e-05, "epoch": 1.5747368421052632, "percentage": 31.49, "elapsed_time": "1:23:49", "remaining_time": "3:02:20"}
+{"current_steps": 11230, "total_steps": 35625, "loss": 0.5177, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.873584729400753e-05, "epoch": 1.576140350877193, "percentage": 31.52, "elapsed_time": "1:23:58", "remaining_time": "3:02:24"}
+{"current_steps": 11240, "total_steps": 35625, "loss": 0.5348, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8717411128652304e-05, "epoch": 1.577543859649123, "percentage": 31.55, "elapsed_time": "1:24:07", "remaining_time": "3:02:30"}
+{"current_steps": 11250, "total_steps": 35625, "loss": 0.4699, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.869896428382236e-05, "epoch": 1.5789473684210527, "percentage": 31.58, "elapsed_time": "1:24:16", "remaining_time": "3:02:35"}
+{"current_steps": 11260, "total_steps": 35625, "loss": 0.5403, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8680506773879184e-05, "epoch": 1.5803508771929824, "percentage": 31.61, "elapsed_time": "1:24:25", "remaining_time": "3:02:40"}
+{"current_steps": 11270, "total_steps": 35625, "loss": 0.4982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8662038613192596e-05, "epoch": 1.5817543859649121, "percentage": 31.64, "elapsed_time": "1:24:34", "remaining_time": "3:02:45"}
+{"current_steps": 11280, "total_steps": 35625, "loss": 0.5025, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8643559816140685e-05, "epoch": 1.583157894736842, "percentage": 31.66, "elapsed_time": "1:24:43", "remaining_time": "3:02:51"}
+{"current_steps": 11290, "total_steps": 35625, "loss": 0.4716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.862507039710982e-05, "epoch": 1.584561403508772, "percentage": 31.69, "elapsed_time": "1:24:51", "remaining_time": "3:02:54"}
+{"current_steps": 11300, "total_steps": 35625, "loss": 0.5378, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.860657037049466e-05, "epoch": 1.5859649122807018, "percentage": 31.72, "elapsed_time": "1:25:00", "remaining_time": "3:02:59"}
+{"current_steps": 11310, "total_steps": 35625, "loss": 0.5017, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.85880597506981e-05, "epoch": 1.5873684210526315, "percentage": 31.75, "elapsed_time": "1:25:08", "remaining_time": "3:03:02"}
+{"current_steps": 11320, "total_steps": 35625, "loss": 0.4612, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.856953855213131e-05, "epoch": 1.5887719298245613, "percentage": 31.78, "elapsed_time": "1:25:15", "remaining_time": "3:03:04"}
+{"current_steps": 11330, "total_steps": 35625, "loss": 0.5077, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.855100678921365e-05, "epoch": 1.5901754385964912, "percentage": 31.8, "elapsed_time": "1:25:26", "remaining_time": "3:03:13"}
+{"current_steps": 11340, "total_steps": 35625, "loss": 0.5643, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8532464476372765e-05, "epoch": 1.5915789473684212, "percentage": 31.83, "elapsed_time": "1:25:35", "remaining_time": "3:03:17"}
+{"current_steps": 11350, "total_steps": 35625, "loss": 0.4939, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.851391162804445e-05, "epoch": 1.592982456140351, "percentage": 31.86, "elapsed_time": "1:25:44", "remaining_time": "3:03:23"}
+{"current_steps": 11360, "total_steps": 35625, "loss": 0.5191, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.849534825867275e-05, "epoch": 1.5943859649122807, "percentage": 31.89, "elapsed_time": "1:25:54", "remaining_time": "3:03:29"}
+{"current_steps": 11370, "total_steps": 35625, "loss": 0.5361, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.847677438270988e-05, "epoch": 1.5957894736842104, "percentage": 31.92, "elapsed_time": "1:26:02", "remaining_time": "3:03:32"}
+{"current_steps": 11380, "total_steps": 35625, "loss": 0.5005, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.845819001461625e-05, "epoch": 1.5971929824561404, "percentage": 31.94, "elapsed_time": "1:26:11", "remaining_time": "3:03:37"}
+{"current_steps": 11390, "total_steps": 35625, "loss": 0.491, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8439595168860406e-05, "epoch": 1.5985964912280701, "percentage": 31.97, "elapsed_time": "1:26:20", "remaining_time": "3:03:43"}
+{"current_steps": 11400, "total_steps": 35625, "loss": 0.5636, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.842098985991909e-05, "epoch": 1.6, "percentage": 32.0, "elapsed_time": "1:26:28", "remaining_time": "3:03:45"}
+{"current_steps": 11410, "total_steps": 35625, "loss": 0.5482, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.840237410227717e-05, "epoch": 1.6014035087719298, "percentage": 32.03, "elapsed_time": "1:26:36", "remaining_time": "3:03:47"}
+{"current_steps": 11420, "total_steps": 35625, "loss": 0.4854, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.838374791042764e-05, "epoch": 1.6028070175438596, "percentage": 32.06, "elapsed_time": "1:26:43", "remaining_time": "3:03:49"}
+{"current_steps": 11430, "total_steps": 35625, "loss": 0.5535, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8365111298871645e-05, "epoch": 1.6042105263157893, "percentage": 32.08, "elapsed_time": "1:26:52", "remaining_time": "3:03:54"}
+{"current_steps": 11440, "total_steps": 35625, "loss": 0.5493, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.834646428211841e-05, "epoch": 1.6056140350877193, "percentage": 32.11, "elapsed_time": "1:27:01", "remaining_time": "3:03:58"}
+{"current_steps": 11450, "total_steps": 35625, "loss": 0.5126, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.83278068746853e-05, "epoch": 1.6070175438596492, "percentage": 32.14, "elapsed_time": "1:27:10", "remaining_time": "3:04:03"}
+{"current_steps": 11460, "total_steps": 35625, "loss": 0.5692, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.830913909109772e-05, "epoch": 1.608421052631579, "percentage": 32.17, "elapsed_time": "1:27:22", "remaining_time": "3:04:14"}
+{"current_steps": 11470, "total_steps": 35625, "loss": 0.4367, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8290460945889186e-05, "epoch": 1.6098245614035087, "percentage": 32.2, "elapsed_time": "1:27:33", "remaining_time": "3:04:22"}
+{"current_steps": 11480, "total_steps": 35625, "loss": 0.5275, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.827177245360129e-05, "epoch": 1.6112280701754385, "percentage": 32.22, "elapsed_time": "1:27:44", "remaining_time": "3:04:33"}
+{"current_steps": 11490, "total_steps": 35625, "loss": 0.5663, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.825307362878364e-05, "epoch": 1.6126315789473684, "percentage": 32.25, "elapsed_time": "1:27:53", "remaining_time": "3:04:37"}
+{"current_steps": 11500, "total_steps": 35625, "loss": 0.4986, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.823436448599393e-05, "epoch": 1.6140350877192984, "percentage": 32.28, "elapsed_time": "1:28:03", "remaining_time": "3:04:44"}
+{"current_steps": 11510, "total_steps": 35625, "loss": 0.5401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8215645039797874e-05, "epoch": 1.6154385964912281, "percentage": 32.31, "elapsed_time": "1:28:10", "remaining_time": "3:04:45"}
+{"current_steps": 11520, "total_steps": 35625, "loss": 0.534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8196915304769184e-05, "epoch": 1.6168421052631579, "percentage": 32.34, "elapsed_time": "1:28:22", "remaining_time": "3:04:55"}
+{"current_steps": 11530, "total_steps": 35625, "loss": 0.457, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.817817529548962e-05, "epoch": 1.6182456140350876, "percentage": 32.36, "elapsed_time": "1:28:31", "remaining_time": "3:04:59"}
+{"current_steps": 11540, "total_steps": 35625, "loss": 0.5023, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.815942502654889e-05, "epoch": 1.6196491228070176, "percentage": 32.39, "elapsed_time": "1:28:38", "remaining_time": "3:05:00"}
+{"current_steps": 11550, "total_steps": 35625, "loss": 0.4885, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8140664512544746e-05, "epoch": 1.6210526315789475, "percentage": 32.42, "elapsed_time": "1:28:47", "remaining_time": "3:05:04"}
+{"current_steps": 11560, "total_steps": 35625, "loss": 0.5204, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8121893768082896e-05, "epoch": 1.6224561403508773, "percentage": 32.45, "elapsed_time": "1:28:55", "remaining_time": "3:05:07"}
+{"current_steps": 11570, "total_steps": 35625, "loss": 0.4611, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8103112807776986e-05, "epoch": 1.623859649122807, "percentage": 32.48, "elapsed_time": "1:29:05", "remaining_time": "3:05:14"}
+{"current_steps": 11580, "total_steps": 35625, "loss": 0.4999, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.8084321646248654e-05, "epoch": 1.6252631578947367, "percentage": 32.51, "elapsed_time": "1:29:13", "remaining_time": "3:05:16"}
+{"current_steps": 11590, "total_steps": 35625, "loss": 0.5241, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.806552029812747e-05, "epoch": 1.6266666666666667, "percentage": 32.53, "elapsed_time": "1:29:27", "remaining_time": "3:05:31"}
+{"current_steps": 11600, "total_steps": 35625, "loss": 0.5275, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.804670877805091e-05, "epoch": 1.6280701754385964, "percentage": 32.56, "elapsed_time": "1:29:35", "remaining_time": "3:05:33"}
+{"current_steps": 11610, "total_steps": 35625, "loss": 0.4517, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.802788710066439e-05, "epoch": 1.6294736842105264, "percentage": 32.59, "elapsed_time": "1:29:42", "remaining_time": "3:05:33"}
+{"current_steps": 11620, "total_steps": 35625, "loss": 0.4437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.800905528062123e-05, "epoch": 1.6308771929824561, "percentage": 32.62, "elapsed_time": "1:29:52", "remaining_time": "3:05:40"}
+{"current_steps": 11630, "total_steps": 35625, "loss": 0.5334, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7990213332582665e-05, "epoch": 1.6322807017543859, "percentage": 32.65, "elapsed_time": "1:29:59", "remaining_time": "3:05:39"}
+{"current_steps": 11640, "total_steps": 35625, "loss": 0.5915, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7971361271217775e-05, "epoch": 1.6336842105263156, "percentage": 32.67, "elapsed_time": "1:30:08", "remaining_time": "3:05:44"}
+{"current_steps": 11650, "total_steps": 35625, "loss": 0.633, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7952499111203544e-05, "epoch": 1.6350877192982456, "percentage": 32.7, "elapsed_time": "1:30:18", "remaining_time": "3:05:51"}
+{"current_steps": 11660, "total_steps": 35625, "loss": 0.523, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.793362686722483e-05, "epoch": 1.6364912280701756, "percentage": 32.73, "elapsed_time": "1:30:27", "remaining_time": "3:05:54"}
+{"current_steps": 11670, "total_steps": 35625, "loss": 0.5025, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7914744553974284e-05, "epoch": 1.6378947368421053, "percentage": 32.76, "elapsed_time": "1:30:38", "remaining_time": "3:06:02"}
+{"current_steps": 11680, "total_steps": 35625, "loss": 0.5153, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.789585218615246e-05, "epoch": 1.639298245614035, "percentage": 32.79, "elapsed_time": "1:30:47", "remaining_time": "3:06:08"}
+{"current_steps": 11690, "total_steps": 35625, "loss": 0.5783, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.787694977846771e-05, "epoch": 1.6407017543859648, "percentage": 32.81, "elapsed_time": "1:31:00", "remaining_time": "3:06:19"}
+{"current_steps": 11700, "total_steps": 35625, "loss": 0.5333, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.78580373456362e-05, "epoch": 1.6421052631578947, "percentage": 32.84, "elapsed_time": "1:31:08", "remaining_time": "3:06:21"}
+{"current_steps": 11710, "total_steps": 35625, "loss": 0.574, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.783911490238191e-05, "epoch": 1.6435087719298247, "percentage": 32.87, "elapsed_time": "1:31:14", "remaining_time": "3:06:21"}
+{"current_steps": 11720, "total_steps": 35625, "loss": 0.5028, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.782018246343661e-05, "epoch": 1.6449122807017544, "percentage": 32.9, "elapsed_time": "1:31:23", "remaining_time": "3:06:24"}
+{"current_steps": 11730, "total_steps": 35625, "loss": 0.5425, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.780124004353987e-05, "epoch": 1.6463157894736842, "percentage": 32.93, "elapsed_time": "1:31:32", "remaining_time": "3:06:28"}
+{"current_steps": 11740, "total_steps": 35625, "loss": 0.4961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.778228765743898e-05, "epoch": 1.647719298245614, "percentage": 32.95, "elapsed_time": "1:31:40", "remaining_time": "3:06:30"}
+{"current_steps": 11750, "total_steps": 35625, "loss": 0.5135, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.776332531988903e-05, "epoch": 1.6491228070175439, "percentage": 32.98, "elapsed_time": "1:31:51", "remaining_time": "3:06:39"}
+{"current_steps": 11760, "total_steps": 35625, "loss": 0.5917, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.774435304565288e-05, "epoch": 1.6505263157894738, "percentage": 33.01, "elapsed_time": "1:32:00", "remaining_time": "3:06:43"}
+{"current_steps": 11770, "total_steps": 35625, "loss": 0.6529, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.772537084950106e-05, "epoch": 1.6519298245614036, "percentage": 33.04, "elapsed_time": "1:32:11", "remaining_time": "3:06:51"}
+{"current_steps": 11780, "total_steps": 35625, "loss": 0.4853, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.770637874621189e-05, "epoch": 1.6533333333333333, "percentage": 33.07, "elapsed_time": "1:32:19", "remaining_time": "3:06:53"}
+{"current_steps": 11790, "total_steps": 35625, "loss": 0.5509, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7687376750571347e-05, "epoch": 1.654736842105263, "percentage": 33.09, "elapsed_time": "1:32:29", "remaining_time": "3:06:58"}
+{"current_steps": 11800, "total_steps": 35625, "loss": 0.5083, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7668364877373154e-05, "epoch": 1.656140350877193, "percentage": 33.12, "elapsed_time": "1:32:36", "remaining_time": "3:06:59"}
+{"current_steps": 11810, "total_steps": 35625, "loss": 0.5239, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.764934314141869e-05, "epoch": 1.6575438596491228, "percentage": 33.15, "elapsed_time": "1:32:44", "remaining_time": "3:07:01"}
+{"current_steps": 11820, "total_steps": 35625, "loss": 0.5295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.763031155751705e-05, "epoch": 1.6589473684210527, "percentage": 33.18, "elapsed_time": "1:32:53", "remaining_time": "3:07:05"}
+{"current_steps": 11830, "total_steps": 35625, "loss": 0.3987, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7611270140484956e-05, "epoch": 1.6603508771929825, "percentage": 33.21, "elapsed_time": "1:33:04", "remaining_time": "3:07:11"}
+{"current_steps": 11840, "total_steps": 35625, "loss": 0.5236, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.759221890514681e-05, "epoch": 1.6617543859649122, "percentage": 33.24, "elapsed_time": "1:33:13", "remaining_time": "3:07:17"}
+{"current_steps": 11850, "total_steps": 35625, "loss": 0.4783, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.757315786633465e-05, "epoch": 1.663157894736842, "percentage": 33.26, "elapsed_time": "1:33:24", "remaining_time": "3:07:23"}
+{"current_steps": 11860, "total_steps": 35625, "loss": 0.6304, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7554087038888155e-05, "epoch": 1.664561403508772, "percentage": 33.29, "elapsed_time": "1:33:31", "remaining_time": "3:07:24"}
+{"current_steps": 11870, "total_steps": 35625, "loss": 0.4951, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.753500643765461e-05, "epoch": 1.6659649122807019, "percentage": 33.32, "elapsed_time": "1:33:40", "remaining_time": "3:07:27"}
+{"current_steps": 11880, "total_steps": 35625, "loss": 0.5195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.751591607748891e-05, "epoch": 1.6673684210526316, "percentage": 33.35, "elapsed_time": "1:33:49", "remaining_time": "3:07:32"}
+{"current_steps": 11890, "total_steps": 35625, "loss": 0.6116, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.749681597325357e-05, "epoch": 1.6687719298245614, "percentage": 33.38, "elapsed_time": "1:33:58", "remaining_time": "3:07:34"}
+{"current_steps": 11900, "total_steps": 35625, "loss": 0.5038, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7477706139818683e-05, "epoch": 1.670175438596491, "percentage": 33.4, "elapsed_time": "1:34:06", "remaining_time": "3:07:37"}
+{"current_steps": 11910, "total_steps": 35625, "loss": 0.5671, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.745858659206188e-05, "epoch": 1.671578947368421, "percentage": 33.43, "elapsed_time": "1:34:15", "remaining_time": "3:07:41"}
+{"current_steps": 11920, "total_steps": 35625, "loss": 0.5559, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.743945734486841e-05, "epoch": 1.672982456140351, "percentage": 33.46, "elapsed_time": "1:34:23", "remaining_time": "3:07:42"}
+{"current_steps": 11930, "total_steps": 35625, "loss": 0.5069, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.742031841313103e-05, "epoch": 1.6743859649122808, "percentage": 33.49, "elapsed_time": "1:34:33", "remaining_time": "3:07:48"}
+{"current_steps": 11940, "total_steps": 35625, "loss": 0.5431, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7401169811750066e-05, "epoch": 1.6757894736842105, "percentage": 33.52, "elapsed_time": "1:34:40", "remaining_time": "3:07:48"}
+{"current_steps": 11950, "total_steps": 35625, "loss": 0.5636, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7382011555633365e-05, "epoch": 1.6771929824561402, "percentage": 33.54, "elapsed_time": "1:34:47", "remaining_time": "3:07:48"}
+{"current_steps": 11960, "total_steps": 35625, "loss": 0.4871, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.736284365969627e-05, "epoch": 1.6785964912280702, "percentage": 33.57, "elapsed_time": "1:34:56", "remaining_time": "3:07:50"}
+{"current_steps": 11970, "total_steps": 35625, "loss": 0.6245, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7343666138861646e-05, "epoch": 1.6800000000000002, "percentage": 33.6, "elapsed_time": "1:35:07", "remaining_time": "3:07:58"}
+{"current_steps": 11980, "total_steps": 35625, "loss": 0.5126, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7324479008059865e-05, "epoch": 1.68140350877193, "percentage": 33.63, "elapsed_time": "1:35:15", "remaining_time": "3:08:00"}
+{"current_steps": 11990, "total_steps": 35625, "loss": 0.5669, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7305282282228756e-05, "epoch": 1.6828070175438596, "percentage": 33.66, "elapsed_time": "1:35:25", "remaining_time": "3:08:05"}
+{"current_steps": 12000, "total_steps": 35625, "loss": 0.4796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.728607597631363e-05, "epoch": 1.6842105263157894, "percentage": 33.68, "elapsed_time": "1:35:35", "remaining_time": "3:08:11"}
+{"current_steps": 12000, "total_steps": 35625, "loss": null, "eval_loss": 0.6500447392463684, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.6842105263157894, "percentage": 33.68, "elapsed_time": "1:35:35", "remaining_time": "3:08:11"}
+{"current_steps": 12000, "total_steps": 35625, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.6842105263157894, "percentage": 33.68, "elapsed_time": "1:35:35", "remaining_time": "3:08:11"}
+{"current_steps": 375, "total_steps": 375, "loss": null, "eval_loss": 0.6415141820907593, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.6842105263157894, "percentage": 100.0, "elapsed_time": "1:39:35", "remaining_time": "0:00:00"}
diff --git a/llama2_13b_peft/news_commentary_it/trainer_state.json b/llama2_13b_peft/news_commentary_it/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f695c95aa1f39734102bdf3ab2894d07b0f04078
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/trainer_state.json
@@ -0,0 +1,8478 @@
+{
+ "best_metric": 0.6415141820907593,
+ "best_model_checkpoint": "ckpt/llama2_13b_fuze27_no_sys/news_commentary_it_no_sys/checkpoint-6000",
+ "epoch": 1.6842105263157894,
+ "eval_steps": 2000,
+ "global_step": 12000,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0014035087719298245,
+ "grad_norm": 0.6813449859619141,
+ "learning_rate": 2.5e-05,
+ "loss": 1.3423,
+ "step": 10
+ },
+ {
+ "epoch": 0.002807017543859649,
+ "grad_norm": 0.9470943212509155,
+ "learning_rate": 5e-05,
+ "loss": 1.3855,
+ "step": 20
+ },
+ {
+ "epoch": 0.004210526315789474,
+ "grad_norm": 0.8929744958877563,
+ "learning_rate": 4.999999026832157e-05,
+ "loss": 0.9621,
+ "step": 30
+ },
+ {
+ "epoch": 0.005614035087719298,
+ "grad_norm": 1.383805274963379,
+ "learning_rate": 4.9999961073293845e-05,
+ "loss": 0.8217,
+ "step": 40
+ },
+ {
+ "epoch": 0.007017543859649123,
+ "grad_norm": 0.7758613228797913,
+ "learning_rate": 4.9999912414939555e-05,
+ "loss": 0.7743,
+ "step": 50
+ },
+ {
+ "epoch": 0.008421052631578947,
+ "grad_norm": 0.38530462980270386,
+ "learning_rate": 4.9999844293296585e-05,
+ "loss": 0.7671,
+ "step": 60
+ },
+ {
+ "epoch": 0.009824561403508772,
+ "grad_norm": 0.9287435412406921,
+ "learning_rate": 4.999975670841798e-05,
+ "loss": 0.7657,
+ "step": 70
+ },
+ {
+ "epoch": 0.011228070175438596,
+ "grad_norm": 0.5709918737411499,
+ "learning_rate": 4.9999649660371906e-05,
+ "loss": 0.6544,
+ "step": 80
+ },
+ {
+ "epoch": 0.01263157894736842,
+ "grad_norm": 0.6181680560112,
+ "learning_rate": 4.9999523149241714e-05,
+ "loss": 0.7627,
+ "step": 90
+ },
+ {
+ "epoch": 0.014035087719298246,
+ "grad_norm": 0.8074678182601929,
+ "learning_rate": 4.99993771751259e-05,
+ "loss": 0.7428,
+ "step": 100
+ },
+ {
+ "epoch": 0.015438596491228071,
+ "grad_norm": 0.7091221809387207,
+ "learning_rate": 4.999921173813812e-05,
+ "loss": 0.7024,
+ "step": 110
+ },
+ {
+ "epoch": 0.016842105263157894,
+ "grad_norm": 2.1647095680236816,
+ "learning_rate": 4.999902683840715e-05,
+ "loss": 0.8205,
+ "step": 120
+ },
+ {
+ "epoch": 0.018245614035087718,
+ "grad_norm": 1.178070068359375,
+ "learning_rate": 4.9998822476076955e-05,
+ "loss": 0.7359,
+ "step": 130
+ },
+ {
+ "epoch": 0.019649122807017545,
+ "grad_norm": 1.0926941633224487,
+ "learning_rate": 4.999859865130664e-05,
+ "loss": 0.6837,
+ "step": 140
+ },
+ {
+ "epoch": 0.021052631578947368,
+ "grad_norm": 1.5175189971923828,
+ "learning_rate": 4.9998355364270445e-05,
+ "loss": 0.7091,
+ "step": 150
+ },
+ {
+ "epoch": 0.02245614035087719,
+ "grad_norm": 0.9353613257408142,
+ "learning_rate": 4.999809261515779e-05,
+ "loss": 0.7608,
+ "step": 160
+ },
+ {
+ "epoch": 0.023859649122807018,
+ "grad_norm": 0.4437258839607239,
+ "learning_rate": 4.9997810404173234e-05,
+ "loss": 0.7725,
+ "step": 170
+ },
+ {
+ "epoch": 0.02526315789473684,
+ "grad_norm": 0.4320019781589508,
+ "learning_rate": 4.999750873153648e-05,
+ "loss": 0.7884,
+ "step": 180
+ },
+ {
+ "epoch": 0.02666666666666667,
+ "grad_norm": 0.8100196123123169,
+ "learning_rate": 4.9997187597482405e-05,
+ "loss": 0.7266,
+ "step": 190
+ },
+ {
+ "epoch": 0.028070175438596492,
+ "grad_norm": 1.1367573738098145,
+ "learning_rate": 4.9996847002261006e-05,
+ "loss": 0.6825,
+ "step": 200
+ },
+ {
+ "epoch": 0.029473684210526315,
+ "grad_norm": 0.9733144640922546,
+ "learning_rate": 4.999648694613746e-05,
+ "loss": 0.6162,
+ "step": 210
+ },
+ {
+ "epoch": 0.030877192982456142,
+ "grad_norm": 0.7170027494430542,
+ "learning_rate": 4.9996107429392083e-05,
+ "loss": 0.6696,
+ "step": 220
+ },
+ {
+ "epoch": 0.032280701754385965,
+ "grad_norm": 0.939182698726654,
+ "learning_rate": 4.9995708452320325e-05,
+ "loss": 0.7512,
+ "step": 230
+ },
+ {
+ "epoch": 0.03368421052631579,
+ "grad_norm": 0.7647657990455627,
+ "learning_rate": 4.999529001523282e-05,
+ "loss": 0.7137,
+ "step": 240
+ },
+ {
+ "epoch": 0.03508771929824561,
+ "grad_norm": 0.9428808093070984,
+ "learning_rate": 4.9994852118455335e-05,
+ "loss": 0.7676,
+ "step": 250
+ },
+ {
+ "epoch": 0.036491228070175435,
+ "grad_norm": 0.3808974325656891,
+ "learning_rate": 4.9994394762328786e-05,
+ "loss": 0.7208,
+ "step": 260
+ },
+ {
+ "epoch": 0.037894736842105266,
+ "grad_norm": 1.0278472900390625,
+ "learning_rate": 4.999391794720923e-05,
+ "loss": 0.7029,
+ "step": 270
+ },
+ {
+ "epoch": 0.03929824561403509,
+ "grad_norm": 0.8878808617591858,
+ "learning_rate": 4.9993421673467906e-05,
+ "loss": 0.6751,
+ "step": 280
+ },
+ {
+ "epoch": 0.04070175438596491,
+ "grad_norm": 0.5619615316390991,
+ "learning_rate": 4.9992905941491155e-05,
+ "loss": 0.7652,
+ "step": 290
+ },
+ {
+ "epoch": 0.042105263157894736,
+ "grad_norm": 1.1087744235992432,
+ "learning_rate": 4.9992370751680514e-05,
+ "loss": 0.7609,
+ "step": 300
+ },
+ {
+ "epoch": 0.04350877192982456,
+ "grad_norm": 0.7816822528839111,
+ "learning_rate": 4.999181610445263e-05,
+ "loss": 0.678,
+ "step": 310
+ },
+ {
+ "epoch": 0.04491228070175438,
+ "grad_norm": 1.0437147617340088,
+ "learning_rate": 4.9991242000239316e-05,
+ "loss": 0.7089,
+ "step": 320
+ },
+ {
+ "epoch": 0.04631578947368421,
+ "grad_norm": 0.7266655564308167,
+ "learning_rate": 4.9990648439487544e-05,
+ "loss": 0.7034,
+ "step": 330
+ },
+ {
+ "epoch": 0.047719298245614036,
+ "grad_norm": 0.8695891499519348,
+ "learning_rate": 4.999003542265941e-05,
+ "loss": 0.6789,
+ "step": 340
+ },
+ {
+ "epoch": 0.04912280701754386,
+ "grad_norm": 1.2530779838562012,
+ "learning_rate": 4.998940295023218e-05,
+ "loss": 0.6895,
+ "step": 350
+ },
+ {
+ "epoch": 0.05052631578947368,
+ "grad_norm": 0.9562914371490479,
+ "learning_rate": 4.9988751022698244e-05,
+ "loss": 0.7472,
+ "step": 360
+ },
+ {
+ "epoch": 0.051929824561403506,
+ "grad_norm": 1.5020138025283813,
+ "learning_rate": 4.9988079640565155e-05,
+ "loss": 0.7637,
+ "step": 370
+ },
+ {
+ "epoch": 0.05333333333333334,
+ "grad_norm": 1.3555861711502075,
+ "learning_rate": 4.998738880435561e-05,
+ "loss": 0.8042,
+ "step": 380
+ },
+ {
+ "epoch": 0.05473684210526316,
+ "grad_norm": 1.4689439535140991,
+ "learning_rate": 4.9986678514607434e-05,
+ "loss": 0.7878,
+ "step": 390
+ },
+ {
+ "epoch": 0.056140350877192984,
+ "grad_norm": 1.1399718523025513,
+ "learning_rate": 4.998594877187362e-05,
+ "loss": 0.6831,
+ "step": 400
+ },
+ {
+ "epoch": 0.05754385964912281,
+ "grad_norm": 0.9988260269165039,
+ "learning_rate": 4.998519957672232e-05,
+ "loss": 0.7905,
+ "step": 410
+ },
+ {
+ "epoch": 0.05894736842105263,
+ "grad_norm": 1.3424835205078125,
+ "learning_rate": 4.998443092973678e-05,
+ "loss": 0.6195,
+ "step": 420
+ },
+ {
+ "epoch": 0.060350877192982454,
+ "grad_norm": 1.3029276132583618,
+ "learning_rate": 4.998364283151542e-05,
+ "loss": 0.7603,
+ "step": 430
+ },
+ {
+ "epoch": 0.061754385964912284,
+ "grad_norm": 1.0647430419921875,
+ "learning_rate": 4.9982835282671816e-05,
+ "loss": 0.7099,
+ "step": 440
+ },
+ {
+ "epoch": 0.06315789473684211,
+ "grad_norm": 0.4545954763889313,
+ "learning_rate": 4.998200828383466e-05,
+ "loss": 0.6307,
+ "step": 450
+ },
+ {
+ "epoch": 0.06456140350877193,
+ "grad_norm": 0.9822194576263428,
+ "learning_rate": 4.99811618356478e-05,
+ "loss": 0.7084,
+ "step": 460
+ },
+ {
+ "epoch": 0.06596491228070175,
+ "grad_norm": 1.0566892623901367,
+ "learning_rate": 4.998029593877025e-05,
+ "loss": 0.6897,
+ "step": 470
+ },
+ {
+ "epoch": 0.06736842105263158,
+ "grad_norm": 0.9908930063247681,
+ "learning_rate": 4.9979410593876096e-05,
+ "loss": 0.7054,
+ "step": 480
+ },
+ {
+ "epoch": 0.0687719298245614,
+ "grad_norm": 0.5955024361610413,
+ "learning_rate": 4.997850580165464e-05,
+ "loss": 0.645,
+ "step": 490
+ },
+ {
+ "epoch": 0.07017543859649122,
+ "grad_norm": 1.570892095565796,
+ "learning_rate": 4.997758156281029e-05,
+ "loss": 0.6455,
+ "step": 500
+ },
+ {
+ "epoch": 0.07157894736842105,
+ "grad_norm": 0.9024527072906494,
+ "learning_rate": 4.997663787806259e-05,
+ "loss": 0.6797,
+ "step": 510
+ },
+ {
+ "epoch": 0.07298245614035087,
+ "grad_norm": 0.6280427575111389,
+ "learning_rate": 4.997567474814623e-05,
+ "loss": 0.7582,
+ "step": 520
+ },
+ {
+ "epoch": 0.07438596491228071,
+ "grad_norm": 1.0706899166107178,
+ "learning_rate": 4.997469217381105e-05,
+ "loss": 0.667,
+ "step": 530
+ },
+ {
+ "epoch": 0.07578947368421053,
+ "grad_norm": 0.8091099262237549,
+ "learning_rate": 4.997369015582201e-05,
+ "loss": 0.6878,
+ "step": 540
+ },
+ {
+ "epoch": 0.07719298245614035,
+ "grad_norm": 0.900131106376648,
+ "learning_rate": 4.9972668694959216e-05,
+ "loss": 0.7693,
+ "step": 550
+ },
+ {
+ "epoch": 0.07859649122807018,
+ "grad_norm": 0.791890025138855,
+ "learning_rate": 4.9971627792017915e-05,
+ "loss": 0.561,
+ "step": 560
+ },
+ {
+ "epoch": 0.08,
+ "grad_norm": 1.3132946491241455,
+ "learning_rate": 4.997056744780848e-05,
+ "loss": 0.6739,
+ "step": 570
+ },
+ {
+ "epoch": 0.08140350877192983,
+ "grad_norm": 0.8182291984558105,
+ "learning_rate": 4.9969487663156434e-05,
+ "loss": 0.6561,
+ "step": 580
+ },
+ {
+ "epoch": 0.08280701754385965,
+ "grad_norm": 1.1820317506790161,
+ "learning_rate": 4.9968388438902415e-05,
+ "loss": 0.6056,
+ "step": 590
+ },
+ {
+ "epoch": 0.08421052631578947,
+ "grad_norm": 0.6508825421333313,
+ "learning_rate": 4.9967269775902204e-05,
+ "loss": 0.6962,
+ "step": 600
+ },
+ {
+ "epoch": 0.0856140350877193,
+ "grad_norm": 0.8885963559150696,
+ "learning_rate": 4.996613167502674e-05,
+ "loss": 0.6174,
+ "step": 610
+ },
+ {
+ "epoch": 0.08701754385964912,
+ "grad_norm": 0.8769521713256836,
+ "learning_rate": 4.996497413716205e-05,
+ "loss": 0.6806,
+ "step": 620
+ },
+ {
+ "epoch": 0.08842105263157894,
+ "grad_norm": 1.1168580055236816,
+ "learning_rate": 4.996379716320933e-05,
+ "loss": 0.7618,
+ "step": 630
+ },
+ {
+ "epoch": 0.08982456140350877,
+ "grad_norm": 0.6629518270492554,
+ "learning_rate": 4.996260075408489e-05,
+ "loss": 0.6796,
+ "step": 640
+ },
+ {
+ "epoch": 0.0912280701754386,
+ "grad_norm": 0.5513269901275635,
+ "learning_rate": 4.996138491072018e-05,
+ "loss": 0.6249,
+ "step": 650
+ },
+ {
+ "epoch": 0.09263157894736843,
+ "grad_norm": 0.8878002166748047,
+ "learning_rate": 4.996014963406177e-05,
+ "loss": 0.6905,
+ "step": 660
+ },
+ {
+ "epoch": 0.09403508771929825,
+ "grad_norm": 1.407973289489746,
+ "learning_rate": 4.9958894925071364e-05,
+ "loss": 0.7082,
+ "step": 670
+ },
+ {
+ "epoch": 0.09543859649122807,
+ "grad_norm": 2.0107500553131104,
+ "learning_rate": 4.995762078472581e-05,
+ "loss": 0.6751,
+ "step": 680
+ },
+ {
+ "epoch": 0.0968421052631579,
+ "grad_norm": 0.7563285827636719,
+ "learning_rate": 4.995632721401705e-05,
+ "loss": 0.6223,
+ "step": 690
+ },
+ {
+ "epoch": 0.09824561403508772,
+ "grad_norm": 0.7729387879371643,
+ "learning_rate": 4.995501421395219e-05,
+ "loss": 0.622,
+ "step": 700
+ },
+ {
+ "epoch": 0.09964912280701754,
+ "grad_norm": 0.9992890954017639,
+ "learning_rate": 4.995368178555343e-05,
+ "loss": 0.7565,
+ "step": 710
+ },
+ {
+ "epoch": 0.10105263157894737,
+ "grad_norm": 1.0641027688980103,
+ "learning_rate": 4.9952329929858125e-05,
+ "loss": 0.7486,
+ "step": 720
+ },
+ {
+ "epoch": 0.10245614035087719,
+ "grad_norm": 0.8268628716468811,
+ "learning_rate": 4.995095864791873e-05,
+ "loss": 0.6825,
+ "step": 730
+ },
+ {
+ "epoch": 0.10385964912280701,
+ "grad_norm": 0.7123477458953857,
+ "learning_rate": 4.994956794080285e-05,
+ "loss": 0.7342,
+ "step": 740
+ },
+ {
+ "epoch": 0.10526315789473684,
+ "grad_norm": 1.0346596240997314,
+ "learning_rate": 4.994815780959318e-05,
+ "loss": 0.6289,
+ "step": 750
+ },
+ {
+ "epoch": 0.10666666666666667,
+ "grad_norm": 0.7578685283660889,
+ "learning_rate": 4.994672825538757e-05,
+ "loss": 0.5675,
+ "step": 760
+ },
+ {
+ "epoch": 0.1080701754385965,
+ "grad_norm": 1.1263622045516968,
+ "learning_rate": 4.994527927929897e-05,
+ "loss": 0.7527,
+ "step": 770
+ },
+ {
+ "epoch": 0.10947368421052632,
+ "grad_norm": 0.8590745329856873,
+ "learning_rate": 4.9943810882455454e-05,
+ "loss": 0.6421,
+ "step": 780
+ },
+ {
+ "epoch": 0.11087719298245614,
+ "grad_norm": 0.7870830297470093,
+ "learning_rate": 4.994232306600023e-05,
+ "loss": 0.7016,
+ "step": 790
+ },
+ {
+ "epoch": 0.11228070175438597,
+ "grad_norm": 0.9499567747116089,
+ "learning_rate": 4.99408158310916e-05,
+ "loss": 0.6911,
+ "step": 800
+ },
+ {
+ "epoch": 0.11368421052631579,
+ "grad_norm": 1.1604363918304443,
+ "learning_rate": 4.9939289178903016e-05,
+ "loss": 0.697,
+ "step": 810
+ },
+ {
+ "epoch": 0.11508771929824561,
+ "grad_norm": 0.7308230400085449,
+ "learning_rate": 4.993774311062301e-05,
+ "loss": 0.5691,
+ "step": 820
+ },
+ {
+ "epoch": 0.11649122807017544,
+ "grad_norm": 1.0032395124435425,
+ "learning_rate": 4.993617762745526e-05,
+ "loss": 0.7744,
+ "step": 830
+ },
+ {
+ "epoch": 0.11789473684210526,
+ "grad_norm": 1.0617241859436035,
+ "learning_rate": 4.993459273061855e-05,
+ "loss": 0.7652,
+ "step": 840
+ },
+ {
+ "epoch": 0.11929824561403508,
+ "grad_norm": 1.207223653793335,
+ "learning_rate": 4.993298842134677e-05,
+ "loss": 0.6843,
+ "step": 850
+ },
+ {
+ "epoch": 0.12070175438596491,
+ "grad_norm": 0.6737737059593201,
+ "learning_rate": 4.993136470088894e-05,
+ "loss": 0.7147,
+ "step": 860
+ },
+ {
+ "epoch": 0.12210526315789473,
+ "grad_norm": 1.3904882669448853,
+ "learning_rate": 4.992972157050916e-05,
+ "loss": 0.641,
+ "step": 870
+ },
+ {
+ "epoch": 0.12350877192982457,
+ "grad_norm": 0.8821682929992676,
+ "learning_rate": 4.992805903148669e-05,
+ "loss": 0.6212,
+ "step": 880
+ },
+ {
+ "epoch": 0.12491228070175439,
+ "grad_norm": 1.214309811592102,
+ "learning_rate": 4.992637708511586e-05,
+ "loss": 0.6817,
+ "step": 890
+ },
+ {
+ "epoch": 0.12631578947368421,
+ "grad_norm": 1.2954894304275513,
+ "learning_rate": 4.9924675732706123e-05,
+ "loss": 0.7072,
+ "step": 900
+ },
+ {
+ "epoch": 0.12771929824561404,
+ "grad_norm": 0.8437069058418274,
+ "learning_rate": 4.992295497558204e-05,
+ "loss": 0.6221,
+ "step": 910
+ },
+ {
+ "epoch": 0.12912280701754386,
+ "grad_norm": 0.6401008367538452,
+ "learning_rate": 4.992121481508328e-05,
+ "loss": 0.6162,
+ "step": 920
+ },
+ {
+ "epoch": 0.13052631578947368,
+ "grad_norm": 1.1894147396087646,
+ "learning_rate": 4.9919455252564624e-05,
+ "loss": 0.7548,
+ "step": 930
+ },
+ {
+ "epoch": 0.1319298245614035,
+ "grad_norm": 0.9592342376708984,
+ "learning_rate": 4.991767628939594e-05,
+ "loss": 0.6377,
+ "step": 940
+ },
+ {
+ "epoch": 0.13333333333333333,
+ "grad_norm": 0.6419144868850708,
+ "learning_rate": 4.991587792696223e-05,
+ "loss": 0.6971,
+ "step": 950
+ },
+ {
+ "epoch": 0.13473684210526315,
+ "grad_norm": 1.0908111333847046,
+ "learning_rate": 4.991406016666356e-05,
+ "loss": 0.7929,
+ "step": 960
+ },
+ {
+ "epoch": 0.13614035087719298,
+ "grad_norm": 1.231597661972046,
+ "learning_rate": 4.9912223009915126e-05,
+ "loss": 0.7556,
+ "step": 970
+ },
+ {
+ "epoch": 0.1375438596491228,
+ "grad_norm": 0.7628648281097412,
+ "learning_rate": 4.991036645814722e-05,
+ "loss": 0.5883,
+ "step": 980
+ },
+ {
+ "epoch": 0.13894736842105262,
+ "grad_norm": 0.766953706741333,
+ "learning_rate": 4.9908490512805236e-05,
+ "loss": 0.6362,
+ "step": 990
+ },
+ {
+ "epoch": 0.14035087719298245,
+ "grad_norm": 1.070429801940918,
+ "learning_rate": 4.990659517534966e-05,
+ "loss": 0.7057,
+ "step": 1000
+ },
+ {
+ "epoch": 0.14175438596491227,
+ "grad_norm": 0.8499042391777039,
+ "learning_rate": 4.990468044725606e-05,
+ "loss": 0.6051,
+ "step": 1010
+ },
+ {
+ "epoch": 0.1431578947368421,
+ "grad_norm": 1.1411361694335938,
+ "learning_rate": 4.990274633001514e-05,
+ "loss": 0.7434,
+ "step": 1020
+ },
+ {
+ "epoch": 0.14456140350877192,
+ "grad_norm": 1.3025455474853516,
+ "learning_rate": 4.990079282513266e-05,
+ "loss": 0.6681,
+ "step": 1030
+ },
+ {
+ "epoch": 0.14596491228070174,
+ "grad_norm": 0.9307923316955566,
+ "learning_rate": 4.9898819934129506e-05,
+ "loss": 0.6655,
+ "step": 1040
+ },
+ {
+ "epoch": 0.14736842105263157,
+ "grad_norm": 0.6463920474052429,
+ "learning_rate": 4.989682765854163e-05,
+ "loss": 0.7529,
+ "step": 1050
+ },
+ {
+ "epoch": 0.14877192982456142,
+ "grad_norm": 1.216407060623169,
+ "learning_rate": 4.989481599992009e-05,
+ "loss": 0.6249,
+ "step": 1060
+ },
+ {
+ "epoch": 0.15017543859649124,
+ "grad_norm": 0.747074544429779,
+ "learning_rate": 4.989278495983103e-05,
+ "loss": 0.6437,
+ "step": 1070
+ },
+ {
+ "epoch": 0.15157894736842106,
+ "grad_norm": 0.8777433633804321,
+ "learning_rate": 4.989073453985569e-05,
+ "loss": 0.6206,
+ "step": 1080
+ },
+ {
+ "epoch": 0.1529824561403509,
+ "grad_norm": 0.8588824272155762,
+ "learning_rate": 4.988866474159037e-05,
+ "loss": 0.6141,
+ "step": 1090
+ },
+ {
+ "epoch": 0.1543859649122807,
+ "grad_norm": 0.6369594931602478,
+ "learning_rate": 4.988657556664652e-05,
+ "loss": 0.6653,
+ "step": 1100
+ },
+ {
+ "epoch": 0.15578947368421053,
+ "grad_norm": 0.7276690006256104,
+ "learning_rate": 4.98844670166506e-05,
+ "loss": 0.6503,
+ "step": 1110
+ },
+ {
+ "epoch": 0.15719298245614036,
+ "grad_norm": 0.6937339305877686,
+ "learning_rate": 4.98823390932442e-05,
+ "loss": 0.6298,
+ "step": 1120
+ },
+ {
+ "epoch": 0.15859649122807018,
+ "grad_norm": 1.4779495000839233,
+ "learning_rate": 4.988019179808398e-05,
+ "loss": 0.6889,
+ "step": 1130
+ },
+ {
+ "epoch": 0.16,
+ "grad_norm": 1.4205069541931152,
+ "learning_rate": 4.987802513284169e-05,
+ "loss": 0.7086,
+ "step": 1140
+ },
+ {
+ "epoch": 0.16140350877192983,
+ "grad_norm": 1.5097942352294922,
+ "learning_rate": 4.9875839099204134e-05,
+ "loss": 0.6727,
+ "step": 1150
+ },
+ {
+ "epoch": 0.16280701754385965,
+ "grad_norm": 0.8267427086830139,
+ "learning_rate": 4.987363369887324e-05,
+ "loss": 0.6993,
+ "step": 1160
+ },
+ {
+ "epoch": 0.16421052631578947,
+ "grad_norm": 1.1303791999816895,
+ "learning_rate": 4.987140893356597e-05,
+ "loss": 0.5671,
+ "step": 1170
+ },
+ {
+ "epoch": 0.1656140350877193,
+ "grad_norm": 0.9507080316543579,
+ "learning_rate": 4.986916480501438e-05,
+ "loss": 0.6929,
+ "step": 1180
+ },
+ {
+ "epoch": 0.16701754385964912,
+ "grad_norm": 1.0298510789871216,
+ "learning_rate": 4.986690131496561e-05,
+ "loss": 0.5368,
+ "step": 1190
+ },
+ {
+ "epoch": 0.16842105263157894,
+ "grad_norm": 1.0742335319519043,
+ "learning_rate": 4.986461846518186e-05,
+ "loss": 0.6473,
+ "step": 1200
+ },
+ {
+ "epoch": 0.16982456140350877,
+ "grad_norm": 1.3724429607391357,
+ "learning_rate": 4.986231625744041e-05,
+ "loss": 0.6698,
+ "step": 1210
+ },
+ {
+ "epoch": 0.1712280701754386,
+ "grad_norm": 0.7210483551025391,
+ "learning_rate": 4.985999469353359e-05,
+ "loss": 0.6747,
+ "step": 1220
+ },
+ {
+ "epoch": 0.1726315789473684,
+ "grad_norm": 0.8128493428230286,
+ "learning_rate": 4.9857653775268853e-05,
+ "loss": 0.6509,
+ "step": 1230
+ },
+ {
+ "epoch": 0.17403508771929824,
+ "grad_norm": 0.9664400815963745,
+ "learning_rate": 4.985529350446865e-05,
+ "loss": 0.6895,
+ "step": 1240
+ },
+ {
+ "epoch": 0.17543859649122806,
+ "grad_norm": 1.0563639402389526,
+ "learning_rate": 4.985291388297055e-05,
+ "loss": 0.5882,
+ "step": 1250
+ },
+ {
+ "epoch": 0.17684210526315788,
+ "grad_norm": 0.7978933453559875,
+ "learning_rate": 4.985051491262716e-05,
+ "loss": 0.6688,
+ "step": 1260
+ },
+ {
+ "epoch": 0.1782456140350877,
+ "grad_norm": 1.0037199258804321,
+ "learning_rate": 4.984809659530617e-05,
+ "loss": 0.6135,
+ "step": 1270
+ },
+ {
+ "epoch": 0.17964912280701753,
+ "grad_norm": 1.0351414680480957,
+ "learning_rate": 4.9845658932890315e-05,
+ "loss": 0.6849,
+ "step": 1280
+ },
+ {
+ "epoch": 0.18105263157894738,
+ "grad_norm": 0.9015732407569885,
+ "learning_rate": 4.9843201927277407e-05,
+ "loss": 0.6036,
+ "step": 1290
+ },
+ {
+ "epoch": 0.1824561403508772,
+ "grad_norm": 1.1445683240890503,
+ "learning_rate": 4.984072558038031e-05,
+ "loss": 0.7348,
+ "step": 1300
+ },
+ {
+ "epoch": 0.18385964912280703,
+ "grad_norm": 1.2019379138946533,
+ "learning_rate": 4.983822989412693e-05,
+ "loss": 0.7679,
+ "step": 1310
+ },
+ {
+ "epoch": 0.18526315789473685,
+ "grad_norm": 0.6560442447662354,
+ "learning_rate": 4.983571487046026e-05,
+ "loss": 0.7083,
+ "step": 1320
+ },
+ {
+ "epoch": 0.18666666666666668,
+ "grad_norm": 0.8415977954864502,
+ "learning_rate": 4.9833180511338314e-05,
+ "loss": 0.6417,
+ "step": 1330
+ },
+ {
+ "epoch": 0.1880701754385965,
+ "grad_norm": 0.8725243210792542,
+ "learning_rate": 4.983062681873421e-05,
+ "loss": 0.6817,
+ "step": 1340
+ },
+ {
+ "epoch": 0.18947368421052632,
+ "grad_norm": 0.8865370154380798,
+ "learning_rate": 4.982805379463605e-05,
+ "loss": 0.6554,
+ "step": 1350
+ },
+ {
+ "epoch": 0.19087719298245615,
+ "grad_norm": 0.6979865431785583,
+ "learning_rate": 4.982546144104704e-05,
+ "loss": 0.6613,
+ "step": 1360
+ },
+ {
+ "epoch": 0.19228070175438597,
+ "grad_norm": 0.8604574203491211,
+ "learning_rate": 4.982284975998541e-05,
+ "loss": 0.6902,
+ "step": 1370
+ },
+ {
+ "epoch": 0.1936842105263158,
+ "grad_norm": 0.849172055721283,
+ "learning_rate": 4.982021875348445e-05,
+ "loss": 0.81,
+ "step": 1380
+ },
+ {
+ "epoch": 0.19508771929824562,
+ "grad_norm": 0.9217461347579956,
+ "learning_rate": 4.9817568423592484e-05,
+ "loss": 0.6796,
+ "step": 1390
+ },
+ {
+ "epoch": 0.19649122807017544,
+ "grad_norm": 1.1720378398895264,
+ "learning_rate": 4.981489877237288e-05,
+ "loss": 0.6109,
+ "step": 1400
+ },
+ {
+ "epoch": 0.19789473684210526,
+ "grad_norm": 0.8361873626708984,
+ "learning_rate": 4.9812209801904064e-05,
+ "loss": 0.7521,
+ "step": 1410
+ },
+ {
+ "epoch": 0.19929824561403509,
+ "grad_norm": 0.9124870896339417,
+ "learning_rate": 4.980950151427948e-05,
+ "loss": 0.6742,
+ "step": 1420
+ },
+ {
+ "epoch": 0.2007017543859649,
+ "grad_norm": 1.0720082521438599,
+ "learning_rate": 4.980677391160763e-05,
+ "loss": 0.659,
+ "step": 1430
+ },
+ {
+ "epoch": 0.20210526315789473,
+ "grad_norm": 0.7144408822059631,
+ "learning_rate": 4.980402699601205e-05,
+ "loss": 0.6392,
+ "step": 1440
+ },
+ {
+ "epoch": 0.20350877192982456,
+ "grad_norm": 0.8546087145805359,
+ "learning_rate": 4.98012607696313e-05,
+ "loss": 0.6674,
+ "step": 1450
+ },
+ {
+ "epoch": 0.20491228070175438,
+ "grad_norm": 0.8717739582061768,
+ "learning_rate": 4.979847523461898e-05,
+ "loss": 0.6772,
+ "step": 1460
+ },
+ {
+ "epoch": 0.2063157894736842,
+ "grad_norm": 0.9035875201225281,
+ "learning_rate": 4.9795670393143735e-05,
+ "loss": 0.6598,
+ "step": 1470
+ },
+ {
+ "epoch": 0.20771929824561403,
+ "grad_norm": 1.5168395042419434,
+ "learning_rate": 4.9792846247389214e-05,
+ "loss": 0.6784,
+ "step": 1480
+ },
+ {
+ "epoch": 0.20912280701754385,
+ "grad_norm": 1.3440768718719482,
+ "learning_rate": 4.979000279955413e-05,
+ "loss": 0.673,
+ "step": 1490
+ },
+ {
+ "epoch": 0.21052631578947367,
+ "grad_norm": 0.6575384140014648,
+ "learning_rate": 4.97871400518522e-05,
+ "loss": 0.6018,
+ "step": 1500
+ },
+ {
+ "epoch": 0.2119298245614035,
+ "grad_norm": 0.843136727809906,
+ "learning_rate": 4.978425800651216e-05,
+ "loss": 0.673,
+ "step": 1510
+ },
+ {
+ "epoch": 0.21333333333333335,
+ "grad_norm": 0.9389488101005554,
+ "learning_rate": 4.978135666577779e-05,
+ "loss": 0.6455,
+ "step": 1520
+ },
+ {
+ "epoch": 0.21473684210526317,
+ "grad_norm": 1.0860190391540527,
+ "learning_rate": 4.977843603190788e-05,
+ "loss": 0.6945,
+ "step": 1530
+ },
+ {
+ "epoch": 0.216140350877193,
+ "grad_norm": 0.923224925994873,
+ "learning_rate": 4.9775496107176245e-05,
+ "loss": 0.6441,
+ "step": 1540
+ },
+ {
+ "epoch": 0.21754385964912282,
+ "grad_norm": 0.9440721273422241,
+ "learning_rate": 4.977253689387172e-05,
+ "loss": 0.6399,
+ "step": 1550
+ },
+ {
+ "epoch": 0.21894736842105264,
+ "grad_norm": 1.225602626800537,
+ "learning_rate": 4.976955839429815e-05,
+ "loss": 0.7059,
+ "step": 1560
+ },
+ {
+ "epoch": 0.22035087719298246,
+ "grad_norm": 0.7701632380485535,
+ "learning_rate": 4.976656061077441e-05,
+ "loss": 0.7422,
+ "step": 1570
+ },
+ {
+ "epoch": 0.2217543859649123,
+ "grad_norm": 1.0036752223968506,
+ "learning_rate": 4.976354354563435e-05,
+ "loss": 0.7,
+ "step": 1580
+ },
+ {
+ "epoch": 0.2231578947368421,
+ "grad_norm": 1.2595415115356445,
+ "learning_rate": 4.976050720122688e-05,
+ "loss": 0.693,
+ "step": 1590
+ },
+ {
+ "epoch": 0.22456140350877193,
+ "grad_norm": 0.9705458283424377,
+ "learning_rate": 4.97574515799159e-05,
+ "loss": 0.6477,
+ "step": 1600
+ },
+ {
+ "epoch": 0.22596491228070176,
+ "grad_norm": 0.9339498281478882,
+ "learning_rate": 4.975437668408031e-05,
+ "loss": 0.6839,
+ "step": 1610
+ },
+ {
+ "epoch": 0.22736842105263158,
+ "grad_norm": 2.5165653228759766,
+ "learning_rate": 4.9751282516114024e-05,
+ "loss": 0.5796,
+ "step": 1620
+ },
+ {
+ "epoch": 0.2287719298245614,
+ "grad_norm": 1.2094191312789917,
+ "learning_rate": 4.9748169078425955e-05,
+ "loss": 0.6967,
+ "step": 1630
+ },
+ {
+ "epoch": 0.23017543859649123,
+ "grad_norm": 0.9400249719619751,
+ "learning_rate": 4.974503637344002e-05,
+ "loss": 0.5507,
+ "step": 1640
+ },
+ {
+ "epoch": 0.23157894736842105,
+ "grad_norm": 0.7801631093025208,
+ "learning_rate": 4.9741884403595135e-05,
+ "loss": 0.6792,
+ "step": 1650
+ },
+ {
+ "epoch": 0.23298245614035087,
+ "grad_norm": 0.8041971325874329,
+ "learning_rate": 4.9738713171345225e-05,
+ "loss": 0.616,
+ "step": 1660
+ },
+ {
+ "epoch": 0.2343859649122807,
+ "grad_norm": 0.9792094826698303,
+ "learning_rate": 4.9735522679159195e-05,
+ "loss": 0.635,
+ "step": 1670
+ },
+ {
+ "epoch": 0.23578947368421052,
+ "grad_norm": 0.8937766551971436,
+ "learning_rate": 4.9732312929520964e-05,
+ "loss": 0.6902,
+ "step": 1680
+ },
+ {
+ "epoch": 0.23719298245614034,
+ "grad_norm": 0.6050293445587158,
+ "learning_rate": 4.972908392492942e-05,
+ "loss": 0.6899,
+ "step": 1690
+ },
+ {
+ "epoch": 0.23859649122807017,
+ "grad_norm": 1.114696741104126,
+ "learning_rate": 4.9725835667898455e-05,
+ "loss": 0.7013,
+ "step": 1700
+ },
+ {
+ "epoch": 0.24,
+ "grad_norm": 0.7658337354660034,
+ "learning_rate": 4.972256816095695e-05,
+ "loss": 0.6379,
+ "step": 1710
+ },
+ {
+ "epoch": 0.24140350877192981,
+ "grad_norm": 1.0719423294067383,
+ "learning_rate": 4.971928140664878e-05,
+ "loss": 0.6819,
+ "step": 1720
+ },
+ {
+ "epoch": 0.24280701754385964,
+ "grad_norm": 0.5609824061393738,
+ "learning_rate": 4.971597540753279e-05,
+ "loss": 0.6888,
+ "step": 1730
+ },
+ {
+ "epoch": 0.24421052631578946,
+ "grad_norm": 0.8473712205886841,
+ "learning_rate": 4.971265016618281e-05,
+ "loss": 0.6761,
+ "step": 1740
+ },
+ {
+ "epoch": 0.24561403508771928,
+ "grad_norm": 1.023040533065796,
+ "learning_rate": 4.970930568518765e-05,
+ "loss": 0.5544,
+ "step": 1750
+ },
+ {
+ "epoch": 0.24701754385964914,
+ "grad_norm": 1.2763292789459229,
+ "learning_rate": 4.97059419671511e-05,
+ "loss": 0.7072,
+ "step": 1760
+ },
+ {
+ "epoch": 0.24842105263157896,
+ "grad_norm": 0.9501249194145203,
+ "learning_rate": 4.9702559014691965e-05,
+ "loss": 0.5992,
+ "step": 1770
+ },
+ {
+ "epoch": 0.24982456140350878,
+ "grad_norm": 0.714192271232605,
+ "learning_rate": 4.969915683044395e-05,
+ "loss": 0.6277,
+ "step": 1780
+ },
+ {
+ "epoch": 0.2512280701754386,
+ "grad_norm": 0.8613963723182678,
+ "learning_rate": 4.9695735417055776e-05,
+ "loss": 0.5501,
+ "step": 1790
+ },
+ {
+ "epoch": 0.25263157894736843,
+ "grad_norm": 0.7384011149406433,
+ "learning_rate": 4.969229477719116e-05,
+ "loss": 0.7619,
+ "step": 1800
+ },
+ {
+ "epoch": 0.2540350877192982,
+ "grad_norm": 0.8516148328781128,
+ "learning_rate": 4.9688834913528724e-05,
+ "loss": 0.6706,
+ "step": 1810
+ },
+ {
+ "epoch": 0.2554385964912281,
+ "grad_norm": 0.9726106524467468,
+ "learning_rate": 4.9685355828762115e-05,
+ "loss": 0.6825,
+ "step": 1820
+ },
+ {
+ "epoch": 0.25684210526315787,
+ "grad_norm": 0.9834999442100525,
+ "learning_rate": 4.96818575255999e-05,
+ "loss": 0.7195,
+ "step": 1830
+ },
+ {
+ "epoch": 0.2582456140350877,
+ "grad_norm": 0.6964922547340393,
+ "learning_rate": 4.967834000676564e-05,
+ "loss": 0.6196,
+ "step": 1840
+ },
+ {
+ "epoch": 0.2596491228070175,
+ "grad_norm": 1.0819238424301147,
+ "learning_rate": 4.967480327499785e-05,
+ "loss": 0.5768,
+ "step": 1850
+ },
+ {
+ "epoch": 0.26105263157894737,
+ "grad_norm": 0.7200153470039368,
+ "learning_rate": 4.9671247333049975e-05,
+ "loss": 0.6484,
+ "step": 1860
+ },
+ {
+ "epoch": 0.2624561403508772,
+ "grad_norm": 0.6098335385322571,
+ "learning_rate": 4.966767218369046e-05,
+ "loss": 0.6132,
+ "step": 1870
+ },
+ {
+ "epoch": 0.263859649122807,
+ "grad_norm": 1.1508702039718628,
+ "learning_rate": 4.966407782970267e-05,
+ "loss": 0.6435,
+ "step": 1880
+ },
+ {
+ "epoch": 0.26526315789473687,
+ "grad_norm": 0.9164888858795166,
+ "learning_rate": 4.966046427388494e-05,
+ "loss": 0.6581,
+ "step": 1890
+ },
+ {
+ "epoch": 0.26666666666666666,
+ "grad_norm": 0.7689521908760071,
+ "learning_rate": 4.965683151905054e-05,
+ "loss": 0.593,
+ "step": 1900
+ },
+ {
+ "epoch": 0.2680701754385965,
+ "grad_norm": 1.2253938913345337,
+ "learning_rate": 4.965317956802769e-05,
+ "loss": 0.7037,
+ "step": 1910
+ },
+ {
+ "epoch": 0.2694736842105263,
+ "grad_norm": 1.2759559154510498,
+ "learning_rate": 4.964950842365957e-05,
+ "loss": 0.7054,
+ "step": 1920
+ },
+ {
+ "epoch": 0.27087719298245616,
+ "grad_norm": 1.0961602926254272,
+ "learning_rate": 4.9645818088804284e-05,
+ "loss": 0.6463,
+ "step": 1930
+ },
+ {
+ "epoch": 0.27228070175438596,
+ "grad_norm": 1.0374549627304077,
+ "learning_rate": 4.964210856633489e-05,
+ "loss": 0.7222,
+ "step": 1940
+ },
+ {
+ "epoch": 0.2736842105263158,
+ "grad_norm": 1.228814721107483,
+ "learning_rate": 4.963837985913938e-05,
+ "loss": 0.7659,
+ "step": 1950
+ },
+ {
+ "epoch": 0.2750877192982456,
+ "grad_norm": 1.131882667541504,
+ "learning_rate": 4.963463197012067e-05,
+ "loss": 0.7388,
+ "step": 1960
+ },
+ {
+ "epoch": 0.27649122807017545,
+ "grad_norm": 0.6964682340621948,
+ "learning_rate": 4.9630864902196626e-05,
+ "loss": 0.6961,
+ "step": 1970
+ },
+ {
+ "epoch": 0.27789473684210525,
+ "grad_norm": 0.6383505463600159,
+ "learning_rate": 4.962707865830004e-05,
+ "loss": 0.5755,
+ "step": 1980
+ },
+ {
+ "epoch": 0.2792982456140351,
+ "grad_norm": 0.9402531981468201,
+ "learning_rate": 4.9623273241378636e-05,
+ "loss": 0.6845,
+ "step": 1990
+ },
+ {
+ "epoch": 0.2807017543859649,
+ "grad_norm": 0.8488597273826599,
+ "learning_rate": 4.9619448654395055e-05,
+ "loss": 0.7236,
+ "step": 2000
+ },
+ {
+ "epoch": 0.2807017543859649,
+ "eval_loss": 0.6709622740745544,
+ "eval_runtime": 44.146,
+ "eval_samples_per_second": 33.978,
+ "eval_steps_per_second": 8.495,
+ "step": 2000
+ },
+ {
+ "epoch": 0.28210526315789475,
+ "grad_norm": 1.1073328256607056,
+ "learning_rate": 4.9615604900326875e-05,
+ "loss": 0.5944,
+ "step": 2010
+ },
+ {
+ "epoch": 0.28350877192982454,
+ "grad_norm": 1.3910387754440308,
+ "learning_rate": 4.961174198216658e-05,
+ "loss": 0.6174,
+ "step": 2020
+ },
+ {
+ "epoch": 0.2849122807017544,
+ "grad_norm": 0.698826253414154,
+ "learning_rate": 4.9607859902921595e-05,
+ "loss": 0.6801,
+ "step": 2030
+ },
+ {
+ "epoch": 0.2863157894736842,
+ "grad_norm": 1.118665099143982,
+ "learning_rate": 4.960395866561425e-05,
+ "loss": 0.6657,
+ "step": 2040
+ },
+ {
+ "epoch": 0.28771929824561404,
+ "grad_norm": 1.1043261289596558,
+ "learning_rate": 4.960003827328179e-05,
+ "loss": 0.6536,
+ "step": 2050
+ },
+ {
+ "epoch": 0.28912280701754384,
+ "grad_norm": 0.7518707513809204,
+ "learning_rate": 4.959609872897637e-05,
+ "loss": 0.6361,
+ "step": 2060
+ },
+ {
+ "epoch": 0.2905263157894737,
+ "grad_norm": 1.0390689373016357,
+ "learning_rate": 4.959214003576507e-05,
+ "loss": 0.6369,
+ "step": 2070
+ },
+ {
+ "epoch": 0.2919298245614035,
+ "grad_norm": 1.122710108757019,
+ "learning_rate": 4.958816219672986e-05,
+ "loss": 0.7563,
+ "step": 2080
+ },
+ {
+ "epoch": 0.29333333333333333,
+ "grad_norm": 0.9062842726707458,
+ "learning_rate": 4.9584165214967634e-05,
+ "loss": 0.6575,
+ "step": 2090
+ },
+ {
+ "epoch": 0.29473684210526313,
+ "grad_norm": 1.4019687175750732,
+ "learning_rate": 4.9580149093590165e-05,
+ "loss": 0.6611,
+ "step": 2100
+ },
+ {
+ "epoch": 0.296140350877193,
+ "grad_norm": 0.9323289394378662,
+ "learning_rate": 4.957611383572415e-05,
+ "loss": 0.6456,
+ "step": 2110
+ },
+ {
+ "epoch": 0.29754385964912283,
+ "grad_norm": 1.0447218418121338,
+ "learning_rate": 4.9572059444511175e-05,
+ "loss": 0.7114,
+ "step": 2120
+ },
+ {
+ "epoch": 0.29894736842105263,
+ "grad_norm": 1.3190436363220215,
+ "learning_rate": 4.956798592310773e-05,
+ "loss": 0.6338,
+ "step": 2130
+ },
+ {
+ "epoch": 0.3003508771929825,
+ "grad_norm": 0.7944990396499634,
+ "learning_rate": 4.956389327468518e-05,
+ "loss": 0.6323,
+ "step": 2140
+ },
+ {
+ "epoch": 0.3017543859649123,
+ "grad_norm": 0.9921332001686096,
+ "learning_rate": 4.9559781502429784e-05,
+ "loss": 0.6231,
+ "step": 2150
+ },
+ {
+ "epoch": 0.3031578947368421,
+ "grad_norm": 1.0437482595443726,
+ "learning_rate": 4.955565060954272e-05,
+ "loss": 0.6515,
+ "step": 2160
+ },
+ {
+ "epoch": 0.3045614035087719,
+ "grad_norm": 0.8929722309112549,
+ "learning_rate": 4.9551500599240006e-05,
+ "loss": 0.6023,
+ "step": 2170
+ },
+ {
+ "epoch": 0.3059649122807018,
+ "grad_norm": 1.1816951036453247,
+ "learning_rate": 4.954733147475259e-05,
+ "loss": 0.678,
+ "step": 2180
+ },
+ {
+ "epoch": 0.30736842105263157,
+ "grad_norm": 1.4489054679870605,
+ "learning_rate": 4.954314323932627e-05,
+ "loss": 0.6307,
+ "step": 2190
+ },
+ {
+ "epoch": 0.3087719298245614,
+ "grad_norm": 0.6073512434959412,
+ "learning_rate": 4.953893589622172e-05,
+ "loss": 0.6543,
+ "step": 2200
+ },
+ {
+ "epoch": 0.3101754385964912,
+ "grad_norm": 0.8957934975624084,
+ "learning_rate": 4.9534709448714514e-05,
+ "loss": 0.7493,
+ "step": 2210
+ },
+ {
+ "epoch": 0.31157894736842107,
+ "grad_norm": 1.1038836240768433,
+ "learning_rate": 4.9530463900095084e-05,
+ "loss": 0.6856,
+ "step": 2220
+ },
+ {
+ "epoch": 0.31298245614035086,
+ "grad_norm": 1.2374224662780762,
+ "learning_rate": 4.952619925366873e-05,
+ "loss": 0.5721,
+ "step": 2230
+ },
+ {
+ "epoch": 0.3143859649122807,
+ "grad_norm": 0.9683862924575806,
+ "learning_rate": 4.9521915512755635e-05,
+ "loss": 0.7126,
+ "step": 2240
+ },
+ {
+ "epoch": 0.3157894736842105,
+ "grad_norm": 1.096661925315857,
+ "learning_rate": 4.951761268069082e-05,
+ "loss": 0.7522,
+ "step": 2250
+ },
+ {
+ "epoch": 0.31719298245614036,
+ "grad_norm": 0.9801945686340332,
+ "learning_rate": 4.95132907608242e-05,
+ "loss": 0.6825,
+ "step": 2260
+ },
+ {
+ "epoch": 0.31859649122807016,
+ "grad_norm": 0.8269819021224976,
+ "learning_rate": 4.950894975652055e-05,
+ "loss": 0.7592,
+ "step": 2270
+ },
+ {
+ "epoch": 0.32,
+ "grad_norm": 0.7468457818031311,
+ "learning_rate": 4.950458967115946e-05,
+ "loss": 0.5719,
+ "step": 2280
+ },
+ {
+ "epoch": 0.3214035087719298,
+ "grad_norm": 0.9176953434944153,
+ "learning_rate": 4.9500210508135436e-05,
+ "loss": 0.6288,
+ "step": 2290
+ },
+ {
+ "epoch": 0.32280701754385965,
+ "grad_norm": 0.6870772838592529,
+ "learning_rate": 4.9495812270857786e-05,
+ "loss": 0.7081,
+ "step": 2300
+ },
+ {
+ "epoch": 0.32421052631578945,
+ "grad_norm": 0.8877288103103638,
+ "learning_rate": 4.94913949627507e-05,
+ "loss": 0.6371,
+ "step": 2310
+ },
+ {
+ "epoch": 0.3256140350877193,
+ "grad_norm": 0.9289653897285461,
+ "learning_rate": 4.9486958587253195e-05,
+ "loss": 0.6712,
+ "step": 2320
+ },
+ {
+ "epoch": 0.3270175438596491,
+ "grad_norm": 0.7378761172294617,
+ "learning_rate": 4.9482503147819156e-05,
+ "loss": 0.6232,
+ "step": 2330
+ },
+ {
+ "epoch": 0.32842105263157895,
+ "grad_norm": 0.7357892394065857,
+ "learning_rate": 4.947802864791727e-05,
+ "loss": 0.6519,
+ "step": 2340
+ },
+ {
+ "epoch": 0.3298245614035088,
+ "grad_norm": 1.509859323501587,
+ "learning_rate": 4.947353509103112e-05,
+ "loss": 0.7172,
+ "step": 2350
+ },
+ {
+ "epoch": 0.3312280701754386,
+ "grad_norm": 0.9467512369155884,
+ "learning_rate": 4.946902248065907e-05,
+ "loss": 0.6784,
+ "step": 2360
+ },
+ {
+ "epoch": 0.33263157894736844,
+ "grad_norm": 1.1108275651931763,
+ "learning_rate": 4.946449082031435e-05,
+ "loss": 0.612,
+ "step": 2370
+ },
+ {
+ "epoch": 0.33403508771929824,
+ "grad_norm": 1.0811039209365845,
+ "learning_rate": 4.9459940113525014e-05,
+ "loss": 0.7573,
+ "step": 2380
+ },
+ {
+ "epoch": 0.3354385964912281,
+ "grad_norm": 0.8881508708000183,
+ "learning_rate": 4.945537036383394e-05,
+ "loss": 0.7167,
+ "step": 2390
+ },
+ {
+ "epoch": 0.3368421052631579,
+ "grad_norm": 1.144106149673462,
+ "learning_rate": 4.945078157479884e-05,
+ "loss": 0.5797,
+ "step": 2400
+ },
+ {
+ "epoch": 0.33824561403508774,
+ "grad_norm": 1.1335030794143677,
+ "learning_rate": 4.944617374999224e-05,
+ "loss": 0.603,
+ "step": 2410
+ },
+ {
+ "epoch": 0.33964912280701753,
+ "grad_norm": 0.9101009368896484,
+ "learning_rate": 4.944154689300148e-05,
+ "loss": 0.6496,
+ "step": 2420
+ },
+ {
+ "epoch": 0.3410526315789474,
+ "grad_norm": 0.9584961533546448,
+ "learning_rate": 4.943690100742875e-05,
+ "loss": 0.6945,
+ "step": 2430
+ },
+ {
+ "epoch": 0.3424561403508772,
+ "grad_norm": 0.8912618160247803,
+ "learning_rate": 4.943223609689101e-05,
+ "loss": 0.6489,
+ "step": 2440
+ },
+ {
+ "epoch": 0.34385964912280703,
+ "grad_norm": 0.7363690733909607,
+ "learning_rate": 4.9427552165020066e-05,
+ "loss": 0.6066,
+ "step": 2450
+ },
+ {
+ "epoch": 0.3452631578947368,
+ "grad_norm": 1.2380393743515015,
+ "learning_rate": 4.9422849215462506e-05,
+ "loss": 0.6208,
+ "step": 2460
+ },
+ {
+ "epoch": 0.3466666666666667,
+ "grad_norm": 1.4995614290237427,
+ "learning_rate": 4.9418127251879756e-05,
+ "loss": 0.7249,
+ "step": 2470
+ },
+ {
+ "epoch": 0.3480701754385965,
+ "grad_norm": 1.0258910655975342,
+ "learning_rate": 4.9413386277948006e-05,
+ "loss": 0.7049,
+ "step": 2480
+ },
+ {
+ "epoch": 0.3494736842105263,
+ "grad_norm": 0.9672191143035889,
+ "learning_rate": 4.9408626297358286e-05,
+ "loss": 0.7138,
+ "step": 2490
+ },
+ {
+ "epoch": 0.3508771929824561,
+ "grad_norm": 0.9736180901527405,
+ "learning_rate": 4.940384731381639e-05,
+ "loss": 0.6047,
+ "step": 2500
+ },
+ {
+ "epoch": 0.35228070175438597,
+ "grad_norm": 0.7992679476737976,
+ "learning_rate": 4.9399049331042925e-05,
+ "loss": 0.6098,
+ "step": 2510
+ },
+ {
+ "epoch": 0.35368421052631577,
+ "grad_norm": 0.6984518766403198,
+ "learning_rate": 4.939423235277328e-05,
+ "loss": 0.6862,
+ "step": 2520
+ },
+ {
+ "epoch": 0.3550877192982456,
+ "grad_norm": 0.9038867354393005,
+ "learning_rate": 4.938939638275765e-05,
+ "loss": 0.7044,
+ "step": 2530
+ },
+ {
+ "epoch": 0.3564912280701754,
+ "grad_norm": 0.9274188280105591,
+ "learning_rate": 4.938454142476099e-05,
+ "loss": 0.6377,
+ "step": 2540
+ },
+ {
+ "epoch": 0.35789473684210527,
+ "grad_norm": 1.2159563302993774,
+ "learning_rate": 4.9379667482563066e-05,
+ "loss": 0.7172,
+ "step": 2550
+ },
+ {
+ "epoch": 0.35929824561403506,
+ "grad_norm": 0.8040406703948975,
+ "learning_rate": 4.937477455995839e-05,
+ "loss": 0.5563,
+ "step": 2560
+ },
+ {
+ "epoch": 0.3607017543859649,
+ "grad_norm": 0.9026057720184326,
+ "learning_rate": 4.9369862660756286e-05,
+ "loss": 0.7217,
+ "step": 2570
+ },
+ {
+ "epoch": 0.36210526315789476,
+ "grad_norm": 0.9877568483352661,
+ "learning_rate": 4.9364931788780835e-05,
+ "loss": 0.6424,
+ "step": 2580
+ },
+ {
+ "epoch": 0.36350877192982456,
+ "grad_norm": 0.8766788244247437,
+ "learning_rate": 4.9359981947870874e-05,
+ "loss": 0.6449,
+ "step": 2590
+ },
+ {
+ "epoch": 0.3649122807017544,
+ "grad_norm": 1.3229867219924927,
+ "learning_rate": 4.9355013141880045e-05,
+ "loss": 0.7963,
+ "step": 2600
+ },
+ {
+ "epoch": 0.3663157894736842,
+ "grad_norm": 1.6900445222854614,
+ "learning_rate": 4.9350025374676725e-05,
+ "loss": 0.716,
+ "step": 2610
+ },
+ {
+ "epoch": 0.36771929824561406,
+ "grad_norm": 1.055550217628479,
+ "learning_rate": 4.934501865014405e-05,
+ "loss": 0.5228,
+ "step": 2620
+ },
+ {
+ "epoch": 0.36912280701754385,
+ "grad_norm": 0.8242397904396057,
+ "learning_rate": 4.933999297217994e-05,
+ "loss": 0.6206,
+ "step": 2630
+ },
+ {
+ "epoch": 0.3705263157894737,
+ "grad_norm": 0.9964637756347656,
+ "learning_rate": 4.933494834469706e-05,
+ "loss": 0.6324,
+ "step": 2640
+ },
+ {
+ "epoch": 0.3719298245614035,
+ "grad_norm": 1.4224967956542969,
+ "learning_rate": 4.9329884771622817e-05,
+ "loss": 0.7658,
+ "step": 2650
+ },
+ {
+ "epoch": 0.37333333333333335,
+ "grad_norm": 1.1131879091262817,
+ "learning_rate": 4.9324802256899385e-05,
+ "loss": 0.6711,
+ "step": 2660
+ },
+ {
+ "epoch": 0.37473684210526315,
+ "grad_norm": 0.7792202830314636,
+ "learning_rate": 4.931970080448366e-05,
+ "loss": 0.5751,
+ "step": 2670
+ },
+ {
+ "epoch": 0.376140350877193,
+ "grad_norm": 0.7242644429206848,
+ "learning_rate": 4.931458041834731e-05,
+ "loss": 0.6772,
+ "step": 2680
+ },
+ {
+ "epoch": 0.3775438596491228,
+ "grad_norm": 0.8322226405143738,
+ "learning_rate": 4.9309441102476734e-05,
+ "loss": 0.6141,
+ "step": 2690
+ },
+ {
+ "epoch": 0.37894736842105264,
+ "grad_norm": 1.1265790462493896,
+ "learning_rate": 4.930428286087306e-05,
+ "loss": 0.627,
+ "step": 2700
+ },
+ {
+ "epoch": 0.38035087719298244,
+ "grad_norm": 1.1249980926513672,
+ "learning_rate": 4.929910569755215e-05,
+ "loss": 0.6991,
+ "step": 2710
+ },
+ {
+ "epoch": 0.3817543859649123,
+ "grad_norm": 1.5213415622711182,
+ "learning_rate": 4.929390961654462e-05,
+ "loss": 0.6379,
+ "step": 2720
+ },
+ {
+ "epoch": 0.3831578947368421,
+ "grad_norm": 0.9948049783706665,
+ "learning_rate": 4.9288694621895776e-05,
+ "loss": 0.673,
+ "step": 2730
+ },
+ {
+ "epoch": 0.38456140350877194,
+ "grad_norm": 1.249971866607666,
+ "learning_rate": 4.928346071766569e-05,
+ "loss": 0.6562,
+ "step": 2740
+ },
+ {
+ "epoch": 0.38596491228070173,
+ "grad_norm": 1.5983259677886963,
+ "learning_rate": 4.927820790792912e-05,
+ "loss": 0.6517,
+ "step": 2750
+ },
+ {
+ "epoch": 0.3873684210526316,
+ "grad_norm": 1.1207720041275024,
+ "learning_rate": 4.9272936196775565e-05,
+ "loss": 0.6506,
+ "step": 2760
+ },
+ {
+ "epoch": 0.3887719298245614,
+ "grad_norm": 1.2459056377410889,
+ "learning_rate": 4.926764558830923e-05,
+ "loss": 0.7087,
+ "step": 2770
+ },
+ {
+ "epoch": 0.39017543859649123,
+ "grad_norm": 2.858981132507324,
+ "learning_rate": 4.926233608664904e-05,
+ "loss": 0.6409,
+ "step": 2780
+ },
+ {
+ "epoch": 0.391578947368421,
+ "grad_norm": 1.2133064270019531,
+ "learning_rate": 4.9257007695928624e-05,
+ "loss": 0.7131,
+ "step": 2790
+ },
+ {
+ "epoch": 0.3929824561403509,
+ "grad_norm": 1.264398455619812,
+ "learning_rate": 4.925166042029631e-05,
+ "loss": 0.7967,
+ "step": 2800
+ },
+ {
+ "epoch": 0.39438596491228073,
+ "grad_norm": 0.8172046542167664,
+ "learning_rate": 4.924629426391515e-05,
+ "loss": 0.6273,
+ "step": 2810
+ },
+ {
+ "epoch": 0.3957894736842105,
+ "grad_norm": 0.5741508603096008,
+ "learning_rate": 4.924090923096286e-05,
+ "loss": 0.6419,
+ "step": 2820
+ },
+ {
+ "epoch": 0.3971929824561404,
+ "grad_norm": 0.8728544116020203,
+ "learning_rate": 4.923550532563189e-05,
+ "loss": 0.6296,
+ "step": 2830
+ },
+ {
+ "epoch": 0.39859649122807017,
+ "grad_norm": 0.6913738250732422,
+ "learning_rate": 4.923008255212935e-05,
+ "loss": 0.6323,
+ "step": 2840
+ },
+ {
+ "epoch": 0.4,
+ "grad_norm": 1.0395629405975342,
+ "learning_rate": 4.922464091467707e-05,
+ "loss": 0.6613,
+ "step": 2850
+ },
+ {
+ "epoch": 0.4014035087719298,
+ "grad_norm": 0.6149466633796692,
+ "learning_rate": 4.921918041751155e-05,
+ "loss": 0.6119,
+ "step": 2860
+ },
+ {
+ "epoch": 0.40280701754385967,
+ "grad_norm": 0.8594980239868164,
+ "learning_rate": 4.9213701064883966e-05,
+ "loss": 0.6575,
+ "step": 2870
+ },
+ {
+ "epoch": 0.40421052631578946,
+ "grad_norm": 1.0025339126586914,
+ "learning_rate": 4.9208202861060185e-05,
+ "loss": 0.7369,
+ "step": 2880
+ },
+ {
+ "epoch": 0.4056140350877193,
+ "grad_norm": 1.1241748332977295,
+ "learning_rate": 4.920268581032074e-05,
+ "loss": 0.6551,
+ "step": 2890
+ },
+ {
+ "epoch": 0.4070175438596491,
+ "grad_norm": 0.7128563523292542,
+ "learning_rate": 4.919714991696086e-05,
+ "loss": 0.6584,
+ "step": 2900
+ },
+ {
+ "epoch": 0.40842105263157896,
+ "grad_norm": 0.5740714073181152,
+ "learning_rate": 4.9191595185290414e-05,
+ "loss": 0.6674,
+ "step": 2910
+ },
+ {
+ "epoch": 0.40982456140350876,
+ "grad_norm": 0.9508911371231079,
+ "learning_rate": 4.918602161963396e-05,
+ "loss": 0.7091,
+ "step": 2920
+ },
+ {
+ "epoch": 0.4112280701754386,
+ "grad_norm": 1.1646149158477783,
+ "learning_rate": 4.9180429224330706e-05,
+ "loss": 0.5862,
+ "step": 2930
+ },
+ {
+ "epoch": 0.4126315789473684,
+ "grad_norm": 1.2261298894882202,
+ "learning_rate": 4.917481800373451e-05,
+ "loss": 0.731,
+ "step": 2940
+ },
+ {
+ "epoch": 0.41403508771929826,
+ "grad_norm": 0.6014220714569092,
+ "learning_rate": 4.916918796221393e-05,
+ "loss": 0.6716,
+ "step": 2950
+ },
+ {
+ "epoch": 0.41543859649122805,
+ "grad_norm": 1.0764710903167725,
+ "learning_rate": 4.9163539104152124e-05,
+ "loss": 0.6427,
+ "step": 2960
+ },
+ {
+ "epoch": 0.4168421052631579,
+ "grad_norm": 0.7629368901252747,
+ "learning_rate": 4.9157871433946925e-05,
+ "loss": 0.6184,
+ "step": 2970
+ },
+ {
+ "epoch": 0.4182456140350877,
+ "grad_norm": 0.8151566982269287,
+ "learning_rate": 4.9152184956010813e-05,
+ "loss": 0.6208,
+ "step": 2980
+ },
+ {
+ "epoch": 0.41964912280701755,
+ "grad_norm": 1.4884957075119019,
+ "learning_rate": 4.91464796747709e-05,
+ "loss": 0.6517,
+ "step": 2990
+ },
+ {
+ "epoch": 0.42105263157894735,
+ "grad_norm": 0.6417763233184814,
+ "learning_rate": 4.914075559466895e-05,
+ "loss": 0.6656,
+ "step": 3000
+ },
+ {
+ "epoch": 0.4224561403508772,
+ "grad_norm": 0.8164128065109253,
+ "learning_rate": 4.913501272016135e-05,
+ "loss": 0.6605,
+ "step": 3010
+ },
+ {
+ "epoch": 0.423859649122807,
+ "grad_norm": 0.9845851063728333,
+ "learning_rate": 4.9129251055719125e-05,
+ "loss": 0.6348,
+ "step": 3020
+ },
+ {
+ "epoch": 0.42526315789473684,
+ "grad_norm": 0.7174735069274902,
+ "learning_rate": 4.912347060582793e-05,
+ "loss": 0.6735,
+ "step": 3030
+ },
+ {
+ "epoch": 0.4266666666666667,
+ "grad_norm": 1.0722357034683228,
+ "learning_rate": 4.911767137498805e-05,
+ "loss": 0.604,
+ "step": 3040
+ },
+ {
+ "epoch": 0.4280701754385965,
+ "grad_norm": 0.8019692897796631,
+ "learning_rate": 4.911185336771437e-05,
+ "loss": 0.659,
+ "step": 3050
+ },
+ {
+ "epoch": 0.42947368421052634,
+ "grad_norm": 0.6950979232788086,
+ "learning_rate": 4.910601658853642e-05,
+ "loss": 0.6545,
+ "step": 3060
+ },
+ {
+ "epoch": 0.43087719298245614,
+ "grad_norm": 1.0000766515731812,
+ "learning_rate": 4.910016104199833e-05,
+ "loss": 0.6057,
+ "step": 3070
+ },
+ {
+ "epoch": 0.432280701754386,
+ "grad_norm": 1.0175904035568237,
+ "learning_rate": 4.909428673265884e-05,
+ "loss": 0.5503,
+ "step": 3080
+ },
+ {
+ "epoch": 0.4336842105263158,
+ "grad_norm": 1.158728003501892,
+ "learning_rate": 4.90883936650913e-05,
+ "loss": 0.6534,
+ "step": 3090
+ },
+ {
+ "epoch": 0.43508771929824563,
+ "grad_norm": 0.9984928369522095,
+ "learning_rate": 4.908248184388367e-05,
+ "loss": 0.6696,
+ "step": 3100
+ },
+ {
+ "epoch": 0.43649122807017543,
+ "grad_norm": 0.8490105867385864,
+ "learning_rate": 4.90765512736385e-05,
+ "loss": 0.5936,
+ "step": 3110
+ },
+ {
+ "epoch": 0.4378947368421053,
+ "grad_norm": 1.14065420627594,
+ "learning_rate": 4.907060195897296e-05,
+ "loss": 0.6154,
+ "step": 3120
+ },
+ {
+ "epoch": 0.4392982456140351,
+ "grad_norm": 1.0342949628829956,
+ "learning_rate": 4.906463390451878e-05,
+ "loss": 0.7975,
+ "step": 3130
+ },
+ {
+ "epoch": 0.44070175438596493,
+ "grad_norm": 1.2673470973968506,
+ "learning_rate": 4.9058647114922286e-05,
+ "loss": 0.6742,
+ "step": 3140
+ },
+ {
+ "epoch": 0.4421052631578947,
+ "grad_norm": 0.7902513146400452,
+ "learning_rate": 4.9052641594844416e-05,
+ "loss": 0.6221,
+ "step": 3150
+ },
+ {
+ "epoch": 0.4435087719298246,
+ "grad_norm": 0.813940167427063,
+ "learning_rate": 4.9046617348960666e-05,
+ "loss": 0.7789,
+ "step": 3160
+ },
+ {
+ "epoch": 0.44491228070175437,
+ "grad_norm": 0.9385407567024231,
+ "learning_rate": 4.904057438196111e-05,
+ "loss": 0.6668,
+ "step": 3170
+ },
+ {
+ "epoch": 0.4463157894736842,
+ "grad_norm": 1.005690574645996,
+ "learning_rate": 4.903451269855043e-05,
+ "loss": 0.6732,
+ "step": 3180
+ },
+ {
+ "epoch": 0.447719298245614,
+ "grad_norm": 1.1185845136642456,
+ "learning_rate": 4.9028432303447826e-05,
+ "loss": 0.652,
+ "step": 3190
+ },
+ {
+ "epoch": 0.44912280701754387,
+ "grad_norm": 1.234397292137146,
+ "learning_rate": 4.902233320138711e-05,
+ "loss": 0.7734,
+ "step": 3200
+ },
+ {
+ "epoch": 0.45052631578947366,
+ "grad_norm": 0.75343918800354,
+ "learning_rate": 4.901621539711664e-05,
+ "loss": 0.6524,
+ "step": 3210
+ },
+ {
+ "epoch": 0.4519298245614035,
+ "grad_norm": 0.7265051603317261,
+ "learning_rate": 4.901007889539933e-05,
+ "loss": 0.5631,
+ "step": 3220
+ },
+ {
+ "epoch": 0.4533333333333333,
+ "grad_norm": 0.8995214700698853,
+ "learning_rate": 4.900392370101266e-05,
+ "loss": 0.6949,
+ "step": 3230
+ },
+ {
+ "epoch": 0.45473684210526316,
+ "grad_norm": 1.1753424406051636,
+ "learning_rate": 4.899774981874867e-05,
+ "loss": 0.7445,
+ "step": 3240
+ },
+ {
+ "epoch": 0.45614035087719296,
+ "grad_norm": 0.9139629602432251,
+ "learning_rate": 4.8991557253413924e-05,
+ "loss": 0.6329,
+ "step": 3250
+ },
+ {
+ "epoch": 0.4575438596491228,
+ "grad_norm": 1.145979881286621,
+ "learning_rate": 4.8985346009829546e-05,
+ "loss": 0.6808,
+ "step": 3260
+ },
+ {
+ "epoch": 0.4589473684210526,
+ "grad_norm": 0.5931209921836853,
+ "learning_rate": 4.8979116092831223e-05,
+ "loss": 0.6464,
+ "step": 3270
+ },
+ {
+ "epoch": 0.46035087719298246,
+ "grad_norm": 0.9794625639915466,
+ "learning_rate": 4.897286750726913e-05,
+ "loss": 0.6997,
+ "step": 3280
+ },
+ {
+ "epoch": 0.4617543859649123,
+ "grad_norm": 1.121286153793335,
+ "learning_rate": 4.8966600258008024e-05,
+ "loss": 0.642,
+ "step": 3290
+ },
+ {
+ "epoch": 0.4631578947368421,
+ "grad_norm": 0.8356245160102844,
+ "learning_rate": 4.896031434992717e-05,
+ "loss": 0.651,
+ "step": 3300
+ },
+ {
+ "epoch": 0.46456140350877195,
+ "grad_norm": 0.8175771832466125,
+ "learning_rate": 4.8954009787920365e-05,
+ "loss": 0.6899,
+ "step": 3310
+ },
+ {
+ "epoch": 0.46596491228070175,
+ "grad_norm": 1.083617925643921,
+ "learning_rate": 4.894768657689592e-05,
+ "loss": 0.7559,
+ "step": 3320
+ },
+ {
+ "epoch": 0.4673684210526316,
+ "grad_norm": 1.0625582933425903,
+ "learning_rate": 4.8941344721776675e-05,
+ "loss": 0.6473,
+ "step": 3330
+ },
+ {
+ "epoch": 0.4687719298245614,
+ "grad_norm": 0.9211772680282593,
+ "learning_rate": 4.893498422749997e-05,
+ "loss": 0.726,
+ "step": 3340
+ },
+ {
+ "epoch": 0.47017543859649125,
+ "grad_norm": 0.8347317576408386,
+ "learning_rate": 4.8928605099017696e-05,
+ "loss": 0.6,
+ "step": 3350
+ },
+ {
+ "epoch": 0.47157894736842104,
+ "grad_norm": 1.115190029144287,
+ "learning_rate": 4.89222073412962e-05,
+ "loss": 0.6202,
+ "step": 3360
+ },
+ {
+ "epoch": 0.4729824561403509,
+ "grad_norm": 1.101366400718689,
+ "learning_rate": 4.8915790959316356e-05,
+ "loss": 0.6717,
+ "step": 3370
+ },
+ {
+ "epoch": 0.4743859649122807,
+ "grad_norm": 0.7661691308021545,
+ "learning_rate": 4.890935595807355e-05,
+ "loss": 0.6328,
+ "step": 3380
+ },
+ {
+ "epoch": 0.47578947368421054,
+ "grad_norm": 0.8245850205421448,
+ "learning_rate": 4.890290234257764e-05,
+ "loss": 0.7271,
+ "step": 3390
+ },
+ {
+ "epoch": 0.47719298245614034,
+ "grad_norm": 1.0110929012298584,
+ "learning_rate": 4.889643011785299e-05,
+ "loss": 0.582,
+ "step": 3400
+ },
+ {
+ "epoch": 0.4785964912280702,
+ "grad_norm": 0.7848758697509766,
+ "learning_rate": 4.888993928893846e-05,
+ "loss": 0.6851,
+ "step": 3410
+ },
+ {
+ "epoch": 0.48,
+ "grad_norm": 0.7310847640037537,
+ "learning_rate": 4.888342986088736e-05,
+ "loss": 0.583,
+ "step": 3420
+ },
+ {
+ "epoch": 0.48140350877192983,
+ "grad_norm": 1.3532679080963135,
+ "learning_rate": 4.887690183876752e-05,
+ "loss": 0.6261,
+ "step": 3430
+ },
+ {
+ "epoch": 0.48280701754385963,
+ "grad_norm": 1.0199493169784546,
+ "learning_rate": 4.887035522766122e-05,
+ "loss": 0.6563,
+ "step": 3440
+ },
+ {
+ "epoch": 0.4842105263157895,
+ "grad_norm": 0.7497562766075134,
+ "learning_rate": 4.886379003266523e-05,
+ "loss": 0.5178,
+ "step": 3450
+ },
+ {
+ "epoch": 0.4856140350877193,
+ "grad_norm": 0.8139173984527588,
+ "learning_rate": 4.885720625889078e-05,
+ "loss": 0.824,
+ "step": 3460
+ },
+ {
+ "epoch": 0.4870175438596491,
+ "grad_norm": 0.6662510633468628,
+ "learning_rate": 4.8850603911463556e-05,
+ "loss": 0.6821,
+ "step": 3470
+ },
+ {
+ "epoch": 0.4884210526315789,
+ "grad_norm": 0.9491138458251953,
+ "learning_rate": 4.8843982995523704e-05,
+ "loss": 0.6955,
+ "step": 3480
+ },
+ {
+ "epoch": 0.4898245614035088,
+ "grad_norm": 0.7988129258155823,
+ "learning_rate": 4.883734351622586e-05,
+ "loss": 0.6447,
+ "step": 3490
+ },
+ {
+ "epoch": 0.49122807017543857,
+ "grad_norm": 1.0620664358139038,
+ "learning_rate": 4.8830685478739057e-05,
+ "loss": 0.6454,
+ "step": 3500
+ },
+ {
+ "epoch": 0.4926315789473684,
+ "grad_norm": 0.7549204230308533,
+ "learning_rate": 4.8824008888246834e-05,
+ "loss": 0.6676,
+ "step": 3510
+ },
+ {
+ "epoch": 0.49403508771929827,
+ "grad_norm": 0.9870264530181885,
+ "learning_rate": 4.8817313749947115e-05,
+ "loss": 0.5715,
+ "step": 3520
+ },
+ {
+ "epoch": 0.49543859649122807,
+ "grad_norm": 0.7582098245620728,
+ "learning_rate": 4.881060006905232e-05,
+ "loss": 0.6479,
+ "step": 3530
+ },
+ {
+ "epoch": 0.4968421052631579,
+ "grad_norm": 0.6988912224769592,
+ "learning_rate": 4.880386785078925e-05,
+ "loss": 0.6208,
+ "step": 3540
+ },
+ {
+ "epoch": 0.4982456140350877,
+ "grad_norm": 0.7568824291229248,
+ "learning_rate": 4.87971171003992e-05,
+ "loss": 0.6503,
+ "step": 3550
+ },
+ {
+ "epoch": 0.49964912280701756,
+ "grad_norm": 1.2903584241867065,
+ "learning_rate": 4.879034782313786e-05,
+ "loss": 0.6525,
+ "step": 3560
+ },
+ {
+ "epoch": 0.5010526315789474,
+ "grad_norm": 0.7582905888557434,
+ "learning_rate": 4.878356002427532e-05,
+ "loss": 0.633,
+ "step": 3570
+ },
+ {
+ "epoch": 0.5024561403508772,
+ "grad_norm": 0.9976963400840759,
+ "learning_rate": 4.877675370909612e-05,
+ "loss": 0.6184,
+ "step": 3580
+ },
+ {
+ "epoch": 0.503859649122807,
+ "grad_norm": 0.8688436150550842,
+ "learning_rate": 4.876992888289923e-05,
+ "loss": 0.64,
+ "step": 3590
+ },
+ {
+ "epoch": 0.5052631578947369,
+ "grad_norm": 1.4120594263076782,
+ "learning_rate": 4.876308555099799e-05,
+ "loss": 0.6238,
+ "step": 3600
+ },
+ {
+ "epoch": 0.5066666666666667,
+ "grad_norm": 1.190382719039917,
+ "learning_rate": 4.875622371872017e-05,
+ "loss": 0.7433,
+ "step": 3610
+ },
+ {
+ "epoch": 0.5080701754385964,
+ "grad_norm": 0.8115689754486084,
+ "learning_rate": 4.874934339140795e-05,
+ "loss": 0.7031,
+ "step": 3620
+ },
+ {
+ "epoch": 0.5094736842105263,
+ "grad_norm": 0.8457335233688354,
+ "learning_rate": 4.8742444574417904e-05,
+ "loss": 0.5443,
+ "step": 3630
+ },
+ {
+ "epoch": 0.5108771929824562,
+ "grad_norm": 0.8754384517669678,
+ "learning_rate": 4.873552727312099e-05,
+ "loss": 0.6728,
+ "step": 3640
+ },
+ {
+ "epoch": 0.512280701754386,
+ "grad_norm": 1.2087777853012085,
+ "learning_rate": 4.872859149290256e-05,
+ "loss": 0.6321,
+ "step": 3650
+ },
+ {
+ "epoch": 0.5136842105263157,
+ "grad_norm": 1.0635002851486206,
+ "learning_rate": 4.872163723916237e-05,
+ "loss": 0.6301,
+ "step": 3660
+ },
+ {
+ "epoch": 0.5150877192982456,
+ "grad_norm": 1.1686186790466309,
+ "learning_rate": 4.871466451731453e-05,
+ "loss": 0.6991,
+ "step": 3670
+ },
+ {
+ "epoch": 0.5164912280701754,
+ "grad_norm": 1.1546950340270996,
+ "learning_rate": 4.870767333278755e-05,
+ "loss": 0.5503,
+ "step": 3680
+ },
+ {
+ "epoch": 0.5178947368421053,
+ "grad_norm": 0.8768120408058167,
+ "learning_rate": 4.87006636910243e-05,
+ "loss": 0.6342,
+ "step": 3690
+ },
+ {
+ "epoch": 0.519298245614035,
+ "grad_norm": 0.8353332281112671,
+ "learning_rate": 4.8693635597482045e-05,
+ "loss": 0.5933,
+ "step": 3700
+ },
+ {
+ "epoch": 0.5207017543859649,
+ "grad_norm": 0.8518616557121277,
+ "learning_rate": 4.868658905763238e-05,
+ "loss": 0.5878,
+ "step": 3710
+ },
+ {
+ "epoch": 0.5221052631578947,
+ "grad_norm": 0.8607089519500732,
+ "learning_rate": 4.8679524076961284e-05,
+ "loss": 0.5478,
+ "step": 3720
+ },
+ {
+ "epoch": 0.5235087719298246,
+ "grad_norm": 1.3177140951156616,
+ "learning_rate": 4.867244066096909e-05,
+ "loss": 0.6024,
+ "step": 3730
+ },
+ {
+ "epoch": 0.5249122807017544,
+ "grad_norm": 1.1247279644012451,
+ "learning_rate": 4.866533881517046e-05,
+ "loss": 0.6106,
+ "step": 3740
+ },
+ {
+ "epoch": 0.5263157894736842,
+ "grad_norm": 1.0166698694229126,
+ "learning_rate": 4.865821854509445e-05,
+ "loss": 0.602,
+ "step": 3750
+ },
+ {
+ "epoch": 0.527719298245614,
+ "grad_norm": 0.7537686824798584,
+ "learning_rate": 4.865107985628442e-05,
+ "loss": 0.7147,
+ "step": 3760
+ },
+ {
+ "epoch": 0.5291228070175439,
+ "grad_norm": 1.1428786516189575,
+ "learning_rate": 4.86439227542981e-05,
+ "loss": 0.6561,
+ "step": 3770
+ },
+ {
+ "epoch": 0.5305263157894737,
+ "grad_norm": 1.1645269393920898,
+ "learning_rate": 4.863674724470751e-05,
+ "loss": 0.7062,
+ "step": 3780
+ },
+ {
+ "epoch": 0.5319298245614035,
+ "grad_norm": 1.128609299659729,
+ "learning_rate": 4.862955333309905e-05,
+ "loss": 0.7019,
+ "step": 3790
+ },
+ {
+ "epoch": 0.5333333333333333,
+ "grad_norm": 1.0182465314865112,
+ "learning_rate": 4.8622341025073425e-05,
+ "loss": 0.5702,
+ "step": 3800
+ },
+ {
+ "epoch": 0.5347368421052632,
+ "grad_norm": 0.9816009998321533,
+ "learning_rate": 4.861511032624567e-05,
+ "loss": 0.6956,
+ "step": 3810
+ },
+ {
+ "epoch": 0.536140350877193,
+ "grad_norm": 0.7931702733039856,
+ "learning_rate": 4.860786124224512e-05,
+ "loss": 0.6266,
+ "step": 3820
+ },
+ {
+ "epoch": 0.5375438596491228,
+ "grad_norm": 1.3353627920150757,
+ "learning_rate": 4.860059377871544e-05,
+ "loss": 0.6758,
+ "step": 3830
+ },
+ {
+ "epoch": 0.5389473684210526,
+ "grad_norm": 1.1476149559020996,
+ "learning_rate": 4.85933079413146e-05,
+ "loss": 0.6559,
+ "step": 3840
+ },
+ {
+ "epoch": 0.5403508771929825,
+ "grad_norm": 0.9160752892494202,
+ "learning_rate": 4.858600373571487e-05,
+ "loss": 0.6052,
+ "step": 3850
+ },
+ {
+ "epoch": 0.5417543859649123,
+ "grad_norm": 1.0451756715774536,
+ "learning_rate": 4.8578681167602834e-05,
+ "loss": 0.6119,
+ "step": 3860
+ },
+ {
+ "epoch": 0.5431578947368421,
+ "grad_norm": 0.9673342108726501,
+ "learning_rate": 4.8571340242679354e-05,
+ "loss": 0.5872,
+ "step": 3870
+ },
+ {
+ "epoch": 0.5445614035087719,
+ "grad_norm": 1.24473237991333,
+ "learning_rate": 4.856398096665959e-05,
+ "loss": 0.7302,
+ "step": 3880
+ },
+ {
+ "epoch": 0.5459649122807018,
+ "grad_norm": 0.967494547367096,
+ "learning_rate": 4.8556603345273e-05,
+ "loss": 0.6889,
+ "step": 3890
+ },
+ {
+ "epoch": 0.5473684210526316,
+ "grad_norm": 0.9426731467247009,
+ "learning_rate": 4.8549207384263305e-05,
+ "loss": 0.6045,
+ "step": 3900
+ },
+ {
+ "epoch": 0.5487719298245614,
+ "grad_norm": 1.033600091934204,
+ "learning_rate": 4.854179308938852e-05,
+ "loss": 0.743,
+ "step": 3910
+ },
+ {
+ "epoch": 0.5501754385964912,
+ "grad_norm": 0.9784322381019592,
+ "learning_rate": 4.8534360466420926e-05,
+ "loss": 0.6416,
+ "step": 3920
+ },
+ {
+ "epoch": 0.5515789473684211,
+ "grad_norm": 1.0500706434249878,
+ "learning_rate": 4.852690952114708e-05,
+ "loss": 0.5975,
+ "step": 3930
+ },
+ {
+ "epoch": 0.5529824561403509,
+ "grad_norm": 1.6134823560714722,
+ "learning_rate": 4.851944025936779e-05,
+ "loss": 0.7975,
+ "step": 3940
+ },
+ {
+ "epoch": 0.5543859649122806,
+ "grad_norm": 0.785410463809967,
+ "learning_rate": 4.851195268689813e-05,
+ "loss": 0.6836,
+ "step": 3950
+ },
+ {
+ "epoch": 0.5557894736842105,
+ "grad_norm": 1.15956449508667,
+ "learning_rate": 4.850444680956745e-05,
+ "loss": 0.5265,
+ "step": 3960
+ },
+ {
+ "epoch": 0.5571929824561404,
+ "grad_norm": 1.0284963846206665,
+ "learning_rate": 4.8496922633219314e-05,
+ "loss": 0.687,
+ "step": 3970
+ },
+ {
+ "epoch": 0.5585964912280702,
+ "grad_norm": 0.5753929615020752,
+ "learning_rate": 4.8489380163711556e-05,
+ "loss": 0.5644,
+ "step": 3980
+ },
+ {
+ "epoch": 0.56,
+ "grad_norm": 1.0494047403335571,
+ "learning_rate": 4.848181940691625e-05,
+ "loss": 0.6013,
+ "step": 3990
+ },
+ {
+ "epoch": 0.5614035087719298,
+ "grad_norm": 1.090614914894104,
+ "learning_rate": 4.8474240368719703e-05,
+ "loss": 0.724,
+ "step": 4000
+ },
+ {
+ "epoch": 0.5614035087719298,
+ "eval_loss": 0.6521075963973999,
+ "eval_runtime": 44.1632,
+ "eval_samples_per_second": 33.965,
+ "eval_steps_per_second": 8.491,
+ "step": 4000
+ },
+ {
+ "epoch": 0.5628070175438596,
+ "grad_norm": 1.593772530555725,
+ "learning_rate": 4.846664305502245e-05,
+ "loss": 0.6668,
+ "step": 4010
+ },
+ {
+ "epoch": 0.5642105263157895,
+ "grad_norm": 1.0096566677093506,
+ "learning_rate": 4.8459027471739284e-05,
+ "loss": 0.6898,
+ "step": 4020
+ },
+ {
+ "epoch": 0.5656140350877193,
+ "grad_norm": 1.126257061958313,
+ "learning_rate": 4.8451393624799165e-05,
+ "loss": 0.6639,
+ "step": 4030
+ },
+ {
+ "epoch": 0.5670175438596491,
+ "grad_norm": 1.0839751958847046,
+ "learning_rate": 4.844374152014532e-05,
+ "loss": 0.7336,
+ "step": 4040
+ },
+ {
+ "epoch": 0.5684210526315789,
+ "grad_norm": 0.4993619918823242,
+ "learning_rate": 4.843607116373518e-05,
+ "loss": 0.6233,
+ "step": 4050
+ },
+ {
+ "epoch": 0.5698245614035088,
+ "grad_norm": 1.6385512351989746,
+ "learning_rate": 4.8428382561540366e-05,
+ "loss": 0.6178,
+ "step": 4060
+ },
+ {
+ "epoch": 0.5712280701754386,
+ "grad_norm": 0.9295198321342468,
+ "learning_rate": 4.8420675719546723e-05,
+ "loss": 0.6121,
+ "step": 4070
+ },
+ {
+ "epoch": 0.5726315789473684,
+ "grad_norm": 1.2179811000823975,
+ "learning_rate": 4.8412950643754305e-05,
+ "loss": 0.6225,
+ "step": 4080
+ },
+ {
+ "epoch": 0.5740350877192982,
+ "grad_norm": 1.1477456092834473,
+ "learning_rate": 4.840520734017734e-05,
+ "loss": 0.6502,
+ "step": 4090
+ },
+ {
+ "epoch": 0.5754385964912281,
+ "grad_norm": 0.8792319297790527,
+ "learning_rate": 4.839744581484425e-05,
+ "loss": 0.6799,
+ "step": 4100
+ },
+ {
+ "epoch": 0.5768421052631579,
+ "grad_norm": 1.995977759361267,
+ "learning_rate": 4.8389666073797646e-05,
+ "loss": 0.7671,
+ "step": 4110
+ },
+ {
+ "epoch": 0.5782456140350877,
+ "grad_norm": 0.680174708366394,
+ "learning_rate": 4.8381868123094335e-05,
+ "loss": 0.6289,
+ "step": 4120
+ },
+ {
+ "epoch": 0.5796491228070175,
+ "grad_norm": 0.8312070369720459,
+ "learning_rate": 4.837405196880529e-05,
+ "loss": 0.6621,
+ "step": 4130
+ },
+ {
+ "epoch": 0.5810526315789474,
+ "grad_norm": 0.8448961973190308,
+ "learning_rate": 4.836621761701564e-05,
+ "loss": 0.601,
+ "step": 4140
+ },
+ {
+ "epoch": 0.5824561403508772,
+ "grad_norm": 1.1311395168304443,
+ "learning_rate": 4.835836507382471e-05,
+ "loss": 0.6818,
+ "step": 4150
+ },
+ {
+ "epoch": 0.583859649122807,
+ "grad_norm": 0.8135958313941956,
+ "learning_rate": 4.835049434534596e-05,
+ "loss": 0.6688,
+ "step": 4160
+ },
+ {
+ "epoch": 0.5852631578947368,
+ "grad_norm": 0.9292672276496887,
+ "learning_rate": 4.8342605437707034e-05,
+ "loss": 0.7652,
+ "step": 4170
+ },
+ {
+ "epoch": 0.5866666666666667,
+ "grad_norm": 1.1490682363510132,
+ "learning_rate": 4.8334698357049715e-05,
+ "loss": 0.5381,
+ "step": 4180
+ },
+ {
+ "epoch": 0.5880701754385965,
+ "grad_norm": 1.1863840818405151,
+ "learning_rate": 4.832677310952993e-05,
+ "loss": 0.6786,
+ "step": 4190
+ },
+ {
+ "epoch": 0.5894736842105263,
+ "grad_norm": 0.7175789475440979,
+ "learning_rate": 4.831882970131777e-05,
+ "loss": 0.629,
+ "step": 4200
+ },
+ {
+ "epoch": 0.5908771929824561,
+ "grad_norm": 0.898485541343689,
+ "learning_rate": 4.831086813859743e-05,
+ "loss": 0.6021,
+ "step": 4210
+ },
+ {
+ "epoch": 0.592280701754386,
+ "grad_norm": 1.0772299766540527,
+ "learning_rate": 4.830288842756728e-05,
+ "loss": 0.5706,
+ "step": 4220
+ },
+ {
+ "epoch": 0.5936842105263158,
+ "grad_norm": 0.8830444812774658,
+ "learning_rate": 4.8294890574439784e-05,
+ "loss": 0.6716,
+ "step": 4230
+ },
+ {
+ "epoch": 0.5950877192982457,
+ "grad_norm": 1.12392258644104,
+ "learning_rate": 4.828687458544155e-05,
+ "loss": 0.6315,
+ "step": 4240
+ },
+ {
+ "epoch": 0.5964912280701754,
+ "grad_norm": 1.751460075378418,
+ "learning_rate": 4.82788404668133e-05,
+ "loss": 0.5633,
+ "step": 4250
+ },
+ {
+ "epoch": 0.5978947368421053,
+ "grad_norm": 1.024601697921753,
+ "learning_rate": 4.827078822480987e-05,
+ "loss": 0.6747,
+ "step": 4260
+ },
+ {
+ "epoch": 0.5992982456140351,
+ "grad_norm": 0.8278754949569702,
+ "learning_rate": 4.826271786570021e-05,
+ "loss": 0.6555,
+ "step": 4270
+ },
+ {
+ "epoch": 0.600701754385965,
+ "grad_norm": 0.9836990237236023,
+ "learning_rate": 4.825462939576737e-05,
+ "loss": 0.5987,
+ "step": 4280
+ },
+ {
+ "epoch": 0.6021052631578947,
+ "grad_norm": 0.5657834410667419,
+ "learning_rate": 4.8246522821308495e-05,
+ "loss": 0.6753,
+ "step": 4290
+ },
+ {
+ "epoch": 0.6035087719298246,
+ "grad_norm": 1.1341723203659058,
+ "learning_rate": 4.823839814863484e-05,
+ "loss": 0.5856,
+ "step": 4300
+ },
+ {
+ "epoch": 0.6049122807017544,
+ "grad_norm": 1.311997652053833,
+ "learning_rate": 4.823025538407173e-05,
+ "loss": 0.6204,
+ "step": 4310
+ },
+ {
+ "epoch": 0.6063157894736843,
+ "grad_norm": 0.8703358173370361,
+ "learning_rate": 4.82220945339586e-05,
+ "loss": 0.5866,
+ "step": 4320
+ },
+ {
+ "epoch": 0.607719298245614,
+ "grad_norm": 0.8117982149124146,
+ "learning_rate": 4.8213915604648944e-05,
+ "loss": 0.7384,
+ "step": 4330
+ },
+ {
+ "epoch": 0.6091228070175438,
+ "grad_norm": 1.2093411684036255,
+ "learning_rate": 4.820571860251034e-05,
+ "loss": 0.7113,
+ "step": 4340
+ },
+ {
+ "epoch": 0.6105263157894737,
+ "grad_norm": 0.895978569984436,
+ "learning_rate": 4.819750353392443e-05,
+ "loss": 0.6544,
+ "step": 4350
+ },
+ {
+ "epoch": 0.6119298245614035,
+ "grad_norm": 0.8177430629730225,
+ "learning_rate": 4.818927040528693e-05,
+ "loss": 0.6317,
+ "step": 4360
+ },
+ {
+ "epoch": 0.6133333333333333,
+ "grad_norm": 0.8065016865730286,
+ "learning_rate": 4.818101922300762e-05,
+ "loss": 0.5756,
+ "step": 4370
+ },
+ {
+ "epoch": 0.6147368421052631,
+ "grad_norm": 0.9234448075294495,
+ "learning_rate": 4.8172749993510315e-05,
+ "loss": 0.687,
+ "step": 4380
+ },
+ {
+ "epoch": 0.616140350877193,
+ "grad_norm": 1.0152438879013062,
+ "learning_rate": 4.81644627232329e-05,
+ "loss": 0.6573,
+ "step": 4390
+ },
+ {
+ "epoch": 0.6175438596491228,
+ "grad_norm": 0.8767795562744141,
+ "learning_rate": 4.81561574186273e-05,
+ "loss": 0.6787,
+ "step": 4400
+ },
+ {
+ "epoch": 0.6189473684210526,
+ "grad_norm": 0.8680139183998108,
+ "learning_rate": 4.814783408615948e-05,
+ "loss": 0.5503,
+ "step": 4410
+ },
+ {
+ "epoch": 0.6203508771929824,
+ "grad_norm": 0.9502211213111877,
+ "learning_rate": 4.813949273230944e-05,
+ "loss": 0.6495,
+ "step": 4420
+ },
+ {
+ "epoch": 0.6217543859649123,
+ "grad_norm": 0.8180057406425476,
+ "learning_rate": 4.8131133363571214e-05,
+ "loss": 0.5845,
+ "step": 4430
+ },
+ {
+ "epoch": 0.6231578947368421,
+ "grad_norm": 1.3863866329193115,
+ "learning_rate": 4.8122755986452845e-05,
+ "loss": 0.6093,
+ "step": 4440
+ },
+ {
+ "epoch": 0.624561403508772,
+ "grad_norm": 0.7499920129776001,
+ "learning_rate": 4.8114360607476416e-05,
+ "loss": 0.6465,
+ "step": 4450
+ },
+ {
+ "epoch": 0.6259649122807017,
+ "grad_norm": 0.7183496952056885,
+ "learning_rate": 4.810594723317801e-05,
+ "loss": 0.6228,
+ "step": 4460
+ },
+ {
+ "epoch": 0.6273684210526316,
+ "grad_norm": 1.3374441862106323,
+ "learning_rate": 4.809751587010774e-05,
+ "loss": 0.657,
+ "step": 4470
+ },
+ {
+ "epoch": 0.6287719298245614,
+ "grad_norm": 0.8970227837562561,
+ "learning_rate": 4.80890665248297e-05,
+ "loss": 0.6068,
+ "step": 4480
+ },
+ {
+ "epoch": 0.6301754385964913,
+ "grad_norm": 1.075203537940979,
+ "learning_rate": 4.808059920392201e-05,
+ "loss": 0.7177,
+ "step": 4490
+ },
+ {
+ "epoch": 0.631578947368421,
+ "grad_norm": 1.3227583169937134,
+ "learning_rate": 4.807211391397674e-05,
+ "loss": 0.6333,
+ "step": 4500
+ },
+ {
+ "epoch": 0.6329824561403509,
+ "grad_norm": 0.8684366941452026,
+ "learning_rate": 4.806361066160001e-05,
+ "loss": 0.6396,
+ "step": 4510
+ },
+ {
+ "epoch": 0.6343859649122807,
+ "grad_norm": 0.9884424209594727,
+ "learning_rate": 4.8055089453411875e-05,
+ "loss": 0.622,
+ "step": 4520
+ },
+ {
+ "epoch": 0.6357894736842106,
+ "grad_norm": 1.2879207134246826,
+ "learning_rate": 4.80465502960464e-05,
+ "loss": 0.6551,
+ "step": 4530
+ },
+ {
+ "epoch": 0.6371929824561403,
+ "grad_norm": 0.8450446724891663,
+ "learning_rate": 4.80379931961516e-05,
+ "loss": 0.6874,
+ "step": 4540
+ },
+ {
+ "epoch": 0.6385964912280702,
+ "grad_norm": 1.0679776668548584,
+ "learning_rate": 4.8029418160389484e-05,
+ "loss": 0.5982,
+ "step": 4550
+ },
+ {
+ "epoch": 0.64,
+ "grad_norm": 0.7384183406829834,
+ "learning_rate": 4.8020825195435994e-05,
+ "loss": 0.5541,
+ "step": 4560
+ },
+ {
+ "epoch": 0.6414035087719299,
+ "grad_norm": 0.8015978336334229,
+ "learning_rate": 4.8012214307981064e-05,
+ "loss": 0.7297,
+ "step": 4570
+ },
+ {
+ "epoch": 0.6428070175438596,
+ "grad_norm": 0.7276405692100525,
+ "learning_rate": 4.800358550472855e-05,
+ "loss": 0.7694,
+ "step": 4580
+ },
+ {
+ "epoch": 0.6442105263157895,
+ "grad_norm": 0.7692060470581055,
+ "learning_rate": 4.799493879239628e-05,
+ "loss": 0.6194,
+ "step": 4590
+ },
+ {
+ "epoch": 0.6456140350877193,
+ "grad_norm": 1.2254407405853271,
+ "learning_rate": 4.7986274177716024e-05,
+ "loss": 0.6358,
+ "step": 4600
+ },
+ {
+ "epoch": 0.6470175438596492,
+ "grad_norm": 1.0495854616165161,
+ "learning_rate": 4.797759166743346e-05,
+ "loss": 0.6828,
+ "step": 4610
+ },
+ {
+ "epoch": 0.6484210526315789,
+ "grad_norm": 0.9298211932182312,
+ "learning_rate": 4.7968891268308246e-05,
+ "loss": 0.7163,
+ "step": 4620
+ },
+ {
+ "epoch": 0.6498245614035087,
+ "grad_norm": 0.9762528538703918,
+ "learning_rate": 4.796017298711391e-05,
+ "loss": 0.5935,
+ "step": 4630
+ },
+ {
+ "epoch": 0.6512280701754386,
+ "grad_norm": 1.0231860876083374,
+ "learning_rate": 4.795143683063797e-05,
+ "loss": 0.5696,
+ "step": 4640
+ },
+ {
+ "epoch": 0.6526315789473685,
+ "grad_norm": 1.1608182191848755,
+ "learning_rate": 4.7942682805681797e-05,
+ "loss": 0.5665,
+ "step": 4650
+ },
+ {
+ "epoch": 0.6540350877192982,
+ "grad_norm": 0.6527351140975952,
+ "learning_rate": 4.79339109190607e-05,
+ "loss": 0.6242,
+ "step": 4660
+ },
+ {
+ "epoch": 0.655438596491228,
+ "grad_norm": 0.8694155812263489,
+ "learning_rate": 4.792512117760391e-05,
+ "loss": 0.6259,
+ "step": 4670
+ },
+ {
+ "epoch": 0.6568421052631579,
+ "grad_norm": 0.9847631454467773,
+ "learning_rate": 4.7916313588154514e-05,
+ "loss": 0.6757,
+ "step": 4680
+ },
+ {
+ "epoch": 0.6582456140350877,
+ "grad_norm": 0.5999444127082825,
+ "learning_rate": 4.790748815756954e-05,
+ "loss": 0.6324,
+ "step": 4690
+ },
+ {
+ "epoch": 0.6596491228070176,
+ "grad_norm": 1.4817160367965698,
+ "learning_rate": 4.78986448927199e-05,
+ "loss": 0.5834,
+ "step": 4700
+ },
+ {
+ "epoch": 0.6610526315789473,
+ "grad_norm": 1.3592370748519897,
+ "learning_rate": 4.788978380049036e-05,
+ "loss": 0.6985,
+ "step": 4710
+ },
+ {
+ "epoch": 0.6624561403508772,
+ "grad_norm": 0.9479141235351562,
+ "learning_rate": 4.78809048877796e-05,
+ "loss": 0.6595,
+ "step": 4720
+ },
+ {
+ "epoch": 0.663859649122807,
+ "grad_norm": 1.3383686542510986,
+ "learning_rate": 4.787200816150014e-05,
+ "loss": 0.7508,
+ "step": 4730
+ },
+ {
+ "epoch": 0.6652631578947369,
+ "grad_norm": 1.0097548961639404,
+ "learning_rate": 4.786309362857839e-05,
+ "loss": 0.6452,
+ "step": 4740
+ },
+ {
+ "epoch": 0.6666666666666666,
+ "grad_norm": 0.9222456812858582,
+ "learning_rate": 4.785416129595463e-05,
+ "loss": 0.6171,
+ "step": 4750
+ },
+ {
+ "epoch": 0.6680701754385965,
+ "grad_norm": 0.9993833303451538,
+ "learning_rate": 4.784521117058298e-05,
+ "loss": 0.654,
+ "step": 4760
+ },
+ {
+ "epoch": 0.6694736842105263,
+ "grad_norm": 0.6470888257026672,
+ "learning_rate": 4.7836243259431425e-05,
+ "loss": 0.6674,
+ "step": 4770
+ },
+ {
+ "epoch": 0.6708771929824562,
+ "grad_norm": 0.8498440980911255,
+ "learning_rate": 4.7827257569481776e-05,
+ "loss": 0.6319,
+ "step": 4780
+ },
+ {
+ "epoch": 0.6722807017543859,
+ "grad_norm": 0.9220410585403442,
+ "learning_rate": 4.781825410772972e-05,
+ "loss": 0.5856,
+ "step": 4790
+ },
+ {
+ "epoch": 0.6736842105263158,
+ "grad_norm": 1.065016746520996,
+ "learning_rate": 4.780923288118475e-05,
+ "loss": 0.5919,
+ "step": 4800
+ },
+ {
+ "epoch": 0.6750877192982456,
+ "grad_norm": 0.7213327288627625,
+ "learning_rate": 4.78001938968702e-05,
+ "loss": 0.6192,
+ "step": 4810
+ },
+ {
+ "epoch": 0.6764912280701755,
+ "grad_norm": 0.7141574025154114,
+ "learning_rate": 4.779113716182323e-05,
+ "loss": 0.6628,
+ "step": 4820
+ },
+ {
+ "epoch": 0.6778947368421052,
+ "grad_norm": 0.7694927453994751,
+ "learning_rate": 4.778206268309482e-05,
+ "loss": 0.6451,
+ "step": 4830
+ },
+ {
+ "epoch": 0.6792982456140351,
+ "grad_norm": 1.2971090078353882,
+ "learning_rate": 4.777297046774977e-05,
+ "loss": 0.6823,
+ "step": 4840
+ },
+ {
+ "epoch": 0.6807017543859649,
+ "grad_norm": 0.7971644401550293,
+ "learning_rate": 4.7763860522866665e-05,
+ "loss": 0.6916,
+ "step": 4850
+ },
+ {
+ "epoch": 0.6821052631578948,
+ "grad_norm": 0.8853887319564819,
+ "learning_rate": 4.775473285553792e-05,
+ "loss": 0.5936,
+ "step": 4860
+ },
+ {
+ "epoch": 0.6835087719298245,
+ "grad_norm": 1.3222453594207764,
+ "learning_rate": 4.774558747286973e-05,
+ "loss": 0.7202,
+ "step": 4870
+ },
+ {
+ "epoch": 0.6849122807017544,
+ "grad_norm": 1.187171220779419,
+ "learning_rate": 4.77364243819821e-05,
+ "loss": 0.6405,
+ "step": 4880
+ },
+ {
+ "epoch": 0.6863157894736842,
+ "grad_norm": 0.8649610280990601,
+ "learning_rate": 4.7727243590008806e-05,
+ "loss": 0.6704,
+ "step": 4890
+ },
+ {
+ "epoch": 0.6877192982456141,
+ "grad_norm": 0.9361883401870728,
+ "learning_rate": 4.771804510409741e-05,
+ "loss": 0.6304,
+ "step": 4900
+ },
+ {
+ "epoch": 0.6891228070175439,
+ "grad_norm": 0.7870001196861267,
+ "learning_rate": 4.7708828931409236e-05,
+ "loss": 0.6645,
+ "step": 4910
+ },
+ {
+ "epoch": 0.6905263157894737,
+ "grad_norm": 1.0028226375579834,
+ "learning_rate": 4.769959507911941e-05,
+ "loss": 0.7018,
+ "step": 4920
+ },
+ {
+ "epoch": 0.6919298245614035,
+ "grad_norm": 0.7500180602073669,
+ "learning_rate": 4.769034355441678e-05,
+ "loss": 0.5191,
+ "step": 4930
+ },
+ {
+ "epoch": 0.6933333333333334,
+ "grad_norm": 0.9766993522644043,
+ "learning_rate": 4.7681074364503995e-05,
+ "loss": 0.6723,
+ "step": 4940
+ },
+ {
+ "epoch": 0.6947368421052632,
+ "grad_norm": 1.3899115324020386,
+ "learning_rate": 4.767178751659743e-05,
+ "loss": 0.7069,
+ "step": 4950
+ },
+ {
+ "epoch": 0.696140350877193,
+ "grad_norm": 1.3812363147735596,
+ "learning_rate": 4.7662483017927215e-05,
+ "loss": 0.6333,
+ "step": 4960
+ },
+ {
+ "epoch": 0.6975438596491228,
+ "grad_norm": 0.6967772841453552,
+ "learning_rate": 4.765316087573722e-05,
+ "loss": 0.7116,
+ "step": 4970
+ },
+ {
+ "epoch": 0.6989473684210527,
+ "grad_norm": 1.235410213470459,
+ "learning_rate": 4.7643821097285044e-05,
+ "loss": 0.5517,
+ "step": 4980
+ },
+ {
+ "epoch": 0.7003508771929825,
+ "grad_norm": 1.0389471054077148,
+ "learning_rate": 4.763446368984205e-05,
+ "loss": 0.6856,
+ "step": 4990
+ },
+ {
+ "epoch": 0.7017543859649122,
+ "grad_norm": 0.9552194476127625,
+ "learning_rate": 4.762508866069327e-05,
+ "loss": 0.6119,
+ "step": 5000
+ },
+ {
+ "epoch": 0.7031578947368421,
+ "grad_norm": 0.8866641521453857,
+ "learning_rate": 4.7615696017137504e-05,
+ "loss": 0.5645,
+ "step": 5010
+ },
+ {
+ "epoch": 0.7045614035087719,
+ "grad_norm": 1.0465891361236572,
+ "learning_rate": 4.760628576648723e-05,
+ "loss": 0.6506,
+ "step": 5020
+ },
+ {
+ "epoch": 0.7059649122807018,
+ "grad_norm": 1.104183316230774,
+ "learning_rate": 4.759685791606868e-05,
+ "loss": 0.6092,
+ "step": 5030
+ },
+ {
+ "epoch": 0.7073684210526315,
+ "grad_norm": 0.8748829364776611,
+ "learning_rate": 4.758741247322174e-05,
+ "loss": 0.7659,
+ "step": 5040
+ },
+ {
+ "epoch": 0.7087719298245614,
+ "grad_norm": 0.9573276042938232,
+ "learning_rate": 4.7577949445300004e-05,
+ "loss": 0.5774,
+ "step": 5050
+ },
+ {
+ "epoch": 0.7101754385964912,
+ "grad_norm": 0.9269713759422302,
+ "learning_rate": 4.756846883967077e-05,
+ "loss": 0.6234,
+ "step": 5060
+ },
+ {
+ "epoch": 0.7115789473684211,
+ "grad_norm": 0.6953681111335754,
+ "learning_rate": 4.755897066371502e-05,
+ "loss": 0.6456,
+ "step": 5070
+ },
+ {
+ "epoch": 0.7129824561403508,
+ "grad_norm": 0.6628289818763733,
+ "learning_rate": 4.754945492482741e-05,
+ "loss": 0.54,
+ "step": 5080
+ },
+ {
+ "epoch": 0.7143859649122807,
+ "grad_norm": 0.7972025871276855,
+ "learning_rate": 4.7539921630416264e-05,
+ "loss": 0.5695,
+ "step": 5090
+ },
+ {
+ "epoch": 0.7157894736842105,
+ "grad_norm": 1.032006859779358,
+ "learning_rate": 4.7530370787903576e-05,
+ "loss": 0.6748,
+ "step": 5100
+ },
+ {
+ "epoch": 0.7171929824561404,
+ "grad_norm": 0.7866501212120056,
+ "learning_rate": 4.7520802404725007e-05,
+ "loss": 0.595,
+ "step": 5110
+ },
+ {
+ "epoch": 0.7185964912280701,
+ "grad_norm": 1.0693832635879517,
+ "learning_rate": 4.751121648832987e-05,
+ "loss": 0.641,
+ "step": 5120
+ },
+ {
+ "epoch": 0.72,
+ "grad_norm": 1.0331542491912842,
+ "learning_rate": 4.750161304618114e-05,
+ "loss": 0.6345,
+ "step": 5130
+ },
+ {
+ "epoch": 0.7214035087719298,
+ "grad_norm": 0.6064502000808716,
+ "learning_rate": 4.749199208575541e-05,
+ "loss": 0.5997,
+ "step": 5140
+ },
+ {
+ "epoch": 0.7228070175438597,
+ "grad_norm": 1.1691397428512573,
+ "learning_rate": 4.748235361454293e-05,
+ "loss": 0.6529,
+ "step": 5150
+ },
+ {
+ "epoch": 0.7242105263157895,
+ "grad_norm": 0.7956925630569458,
+ "learning_rate": 4.7472697640047594e-05,
+ "loss": 0.5668,
+ "step": 5160
+ },
+ {
+ "epoch": 0.7256140350877193,
+ "grad_norm": 0.9746783971786499,
+ "learning_rate": 4.7463024169786895e-05,
+ "loss": 0.6433,
+ "step": 5170
+ },
+ {
+ "epoch": 0.7270175438596491,
+ "grad_norm": 1.2105709314346313,
+ "learning_rate": 4.745333321129197e-05,
+ "loss": 0.6749,
+ "step": 5180
+ },
+ {
+ "epoch": 0.728421052631579,
+ "grad_norm": 0.7860882878303528,
+ "learning_rate": 4.744362477210755e-05,
+ "loss": 0.7041,
+ "step": 5190
+ },
+ {
+ "epoch": 0.7298245614035088,
+ "grad_norm": 1.1629239320755005,
+ "learning_rate": 4.7433898859792e-05,
+ "loss": 0.5598,
+ "step": 5200
+ },
+ {
+ "epoch": 0.7312280701754386,
+ "grad_norm": 1.1319113969802856,
+ "learning_rate": 4.742415548191728e-05,
+ "loss": 0.6433,
+ "step": 5210
+ },
+ {
+ "epoch": 0.7326315789473684,
+ "grad_norm": 0.8640940189361572,
+ "learning_rate": 4.741439464606893e-05,
+ "loss": 0.6715,
+ "step": 5220
+ },
+ {
+ "epoch": 0.7340350877192983,
+ "grad_norm": 0.7730684280395508,
+ "learning_rate": 4.740461635984609e-05,
+ "loss": 0.6391,
+ "step": 5230
+ },
+ {
+ "epoch": 0.7354385964912281,
+ "grad_norm": 1.2042145729064941,
+ "learning_rate": 4.739482063086152e-05,
+ "loss": 0.5834,
+ "step": 5240
+ },
+ {
+ "epoch": 0.7368421052631579,
+ "grad_norm": 0.9434259533882141,
+ "learning_rate": 4.73850074667415e-05,
+ "loss": 0.7835,
+ "step": 5250
+ },
+ {
+ "epoch": 0.7382456140350877,
+ "grad_norm": 0.8331650495529175,
+ "learning_rate": 4.737517687512593e-05,
+ "loss": 0.6128,
+ "step": 5260
+ },
+ {
+ "epoch": 0.7396491228070176,
+ "grad_norm": 0.8063735365867615,
+ "learning_rate": 4.7365328863668256e-05,
+ "loss": 0.655,
+ "step": 5270
+ },
+ {
+ "epoch": 0.7410526315789474,
+ "grad_norm": 0.6377186179161072,
+ "learning_rate": 4.735546344003551e-05,
+ "loss": 0.6506,
+ "step": 5280
+ },
+ {
+ "epoch": 0.7424561403508771,
+ "grad_norm": 0.8817654252052307,
+ "learning_rate": 4.734558061190824e-05,
+ "loss": 0.6984,
+ "step": 5290
+ },
+ {
+ "epoch": 0.743859649122807,
+ "grad_norm": 1.2554540634155273,
+ "learning_rate": 4.733568038698057e-05,
+ "loss": 0.7401,
+ "step": 5300
+ },
+ {
+ "epoch": 0.7452631578947368,
+ "grad_norm": 1.0858135223388672,
+ "learning_rate": 4.732576277296017e-05,
+ "loss": 0.6432,
+ "step": 5310
+ },
+ {
+ "epoch": 0.7466666666666667,
+ "grad_norm": 1.1962653398513794,
+ "learning_rate": 4.731582777756825e-05,
+ "loss": 0.6687,
+ "step": 5320
+ },
+ {
+ "epoch": 0.7480701754385964,
+ "grad_norm": 1.0213031768798828,
+ "learning_rate": 4.730587540853954e-05,
+ "loss": 0.6489,
+ "step": 5330
+ },
+ {
+ "epoch": 0.7494736842105263,
+ "grad_norm": 0.8629382252693176,
+ "learning_rate": 4.729590567362228e-05,
+ "loss": 0.6149,
+ "step": 5340
+ },
+ {
+ "epoch": 0.7508771929824561,
+ "grad_norm": 0.6692180633544922,
+ "learning_rate": 4.728591858057827e-05,
+ "loss": 0.6227,
+ "step": 5350
+ },
+ {
+ "epoch": 0.752280701754386,
+ "grad_norm": 0.9368489980697632,
+ "learning_rate": 4.727591413718282e-05,
+ "loss": 0.712,
+ "step": 5360
+ },
+ {
+ "epoch": 0.7536842105263157,
+ "grad_norm": 1.1019880771636963,
+ "learning_rate": 4.7265892351224694e-05,
+ "loss": 0.7172,
+ "step": 5370
+ },
+ {
+ "epoch": 0.7550877192982456,
+ "grad_norm": 0.8168277144432068,
+ "learning_rate": 4.725585323050623e-05,
+ "loss": 0.6812,
+ "step": 5380
+ },
+ {
+ "epoch": 0.7564912280701754,
+ "grad_norm": 1.0383678674697876,
+ "learning_rate": 4.72457967828432e-05,
+ "loss": 0.6266,
+ "step": 5390
+ },
+ {
+ "epoch": 0.7578947368421053,
+ "grad_norm": 1.9418814182281494,
+ "learning_rate": 4.723572301606492e-05,
+ "loss": 0.6976,
+ "step": 5400
+ },
+ {
+ "epoch": 0.7592982456140351,
+ "grad_norm": 1.1380218267440796,
+ "learning_rate": 4.7225631938014134e-05,
+ "loss": 0.709,
+ "step": 5410
+ },
+ {
+ "epoch": 0.7607017543859649,
+ "grad_norm": 0.7876071333885193,
+ "learning_rate": 4.7215523556547116e-05,
+ "loss": 0.5956,
+ "step": 5420
+ },
+ {
+ "epoch": 0.7621052631578947,
+ "grad_norm": 0.9458256363868713,
+ "learning_rate": 4.720539787953357e-05,
+ "loss": 0.6943,
+ "step": 5430
+ },
+ {
+ "epoch": 0.7635087719298246,
+ "grad_norm": 0.6351762413978577,
+ "learning_rate": 4.71952549148567e-05,
+ "loss": 0.6322,
+ "step": 5440
+ },
+ {
+ "epoch": 0.7649122807017544,
+ "grad_norm": 0.8464050889015198,
+ "learning_rate": 4.7185094670413134e-05,
+ "loss": 0.6258,
+ "step": 5450
+ },
+ {
+ "epoch": 0.7663157894736842,
+ "grad_norm": 1.7159314155578613,
+ "learning_rate": 4.7174917154112984e-05,
+ "loss": 0.6347,
+ "step": 5460
+ },
+ {
+ "epoch": 0.767719298245614,
+ "grad_norm": 0.8159227967262268,
+ "learning_rate": 4.716472237387979e-05,
+ "loss": 0.6423,
+ "step": 5470
+ },
+ {
+ "epoch": 0.7691228070175439,
+ "grad_norm": 1.1517149209976196,
+ "learning_rate": 4.715451033765054e-05,
+ "loss": 0.6614,
+ "step": 5480
+ },
+ {
+ "epoch": 0.7705263157894737,
+ "grad_norm": 1.164534091949463,
+ "learning_rate": 4.714428105337565e-05,
+ "loss": 0.6326,
+ "step": 5490
+ },
+ {
+ "epoch": 0.7719298245614035,
+ "grad_norm": 1.0906124114990234,
+ "learning_rate": 4.713403452901898e-05,
+ "loss": 0.6146,
+ "step": 5500
+ },
+ {
+ "epoch": 0.7733333333333333,
+ "grad_norm": 0.7224928140640259,
+ "learning_rate": 4.7123770772557774e-05,
+ "loss": 0.6061,
+ "step": 5510
+ },
+ {
+ "epoch": 0.7747368421052632,
+ "grad_norm": 1.1344630718231201,
+ "learning_rate": 4.711348979198274e-05,
+ "loss": 0.7423,
+ "step": 5520
+ },
+ {
+ "epoch": 0.776140350877193,
+ "grad_norm": 1.0616703033447266,
+ "learning_rate": 4.710319159529798e-05,
+ "loss": 0.6648,
+ "step": 5530
+ },
+ {
+ "epoch": 0.7775438596491228,
+ "grad_norm": 0.8563722968101501,
+ "learning_rate": 4.709287619052098e-05,
+ "loss": 0.551,
+ "step": 5540
+ },
+ {
+ "epoch": 0.7789473684210526,
+ "grad_norm": 0.7541974186897278,
+ "learning_rate": 4.708254358568264e-05,
+ "loss": 0.7394,
+ "step": 5550
+ },
+ {
+ "epoch": 0.7803508771929825,
+ "grad_norm": 0.9201952815055847,
+ "learning_rate": 4.7072193788827236e-05,
+ "loss": 0.57,
+ "step": 5560
+ },
+ {
+ "epoch": 0.7817543859649123,
+ "grad_norm": 0.8615202307701111,
+ "learning_rate": 4.706182680801245e-05,
+ "loss": 0.6293,
+ "step": 5570
+ },
+ {
+ "epoch": 0.783157894736842,
+ "grad_norm": 0.8534351587295532,
+ "learning_rate": 4.705144265130934e-05,
+ "loss": 0.6007,
+ "step": 5580
+ },
+ {
+ "epoch": 0.7845614035087719,
+ "grad_norm": 0.8691478967666626,
+ "learning_rate": 4.704104132680231e-05,
+ "loss": 0.5963,
+ "step": 5590
+ },
+ {
+ "epoch": 0.7859649122807018,
+ "grad_norm": 1.209688663482666,
+ "learning_rate": 4.703062284258916e-05,
+ "loss": 0.7237,
+ "step": 5600
+ },
+ {
+ "epoch": 0.7873684210526316,
+ "grad_norm": 0.72704017162323,
+ "learning_rate": 4.702018720678103e-05,
+ "loss": 0.6452,
+ "step": 5610
+ },
+ {
+ "epoch": 0.7887719298245615,
+ "grad_norm": 1.3118873834609985,
+ "learning_rate": 4.7009734427502426e-05,
+ "loss": 0.6291,
+ "step": 5620
+ },
+ {
+ "epoch": 0.7901754385964912,
+ "grad_norm": 0.6223419308662415,
+ "learning_rate": 4.699926451289119e-05,
+ "loss": 0.5925,
+ "step": 5630
+ },
+ {
+ "epoch": 0.791578947368421,
+ "grad_norm": 1.0733870267868042,
+ "learning_rate": 4.698877747109852e-05,
+ "loss": 0.7342,
+ "step": 5640
+ },
+ {
+ "epoch": 0.7929824561403509,
+ "grad_norm": 0.7960459589958191,
+ "learning_rate": 4.697827331028893e-05,
+ "loss": 0.644,
+ "step": 5650
+ },
+ {
+ "epoch": 0.7943859649122808,
+ "grad_norm": 0.9189769625663757,
+ "learning_rate": 4.6967752038640264e-05,
+ "loss": 0.6567,
+ "step": 5660
+ },
+ {
+ "epoch": 0.7957894736842105,
+ "grad_norm": 1.1323273181915283,
+ "learning_rate": 4.695721366434369e-05,
+ "loss": 0.6873,
+ "step": 5670
+ },
+ {
+ "epoch": 0.7971929824561403,
+ "grad_norm": 0.8580273389816284,
+ "learning_rate": 4.694665819560371e-05,
+ "loss": 0.6733,
+ "step": 5680
+ },
+ {
+ "epoch": 0.7985964912280702,
+ "grad_norm": 1.3165494203567505,
+ "learning_rate": 4.693608564063811e-05,
+ "loss": 0.642,
+ "step": 5690
+ },
+ {
+ "epoch": 0.8,
+ "grad_norm": 0.7017198801040649,
+ "learning_rate": 4.692549600767798e-05,
+ "loss": 0.5438,
+ "step": 5700
+ },
+ {
+ "epoch": 0.8014035087719298,
+ "grad_norm": 0.8478591442108154,
+ "learning_rate": 4.6914889304967725e-05,
+ "loss": 0.6107,
+ "step": 5710
+ },
+ {
+ "epoch": 0.8028070175438596,
+ "grad_norm": 0.9716276526451111,
+ "learning_rate": 4.690426554076501e-05,
+ "loss": 0.5975,
+ "step": 5720
+ },
+ {
+ "epoch": 0.8042105263157895,
+ "grad_norm": 1.0631777048110962,
+ "learning_rate": 4.689362472334082e-05,
+ "loss": 0.6563,
+ "step": 5730
+ },
+ {
+ "epoch": 0.8056140350877193,
+ "grad_norm": 0.9736322164535522,
+ "learning_rate": 4.688296686097937e-05,
+ "loss": 0.6199,
+ "step": 5740
+ },
+ {
+ "epoch": 0.8070175438596491,
+ "grad_norm": 0.9049164652824402,
+ "learning_rate": 4.6872291961978195e-05,
+ "loss": 0.5772,
+ "step": 5750
+ },
+ {
+ "epoch": 0.8084210526315789,
+ "grad_norm": 1.2360827922821045,
+ "learning_rate": 4.6861600034648064e-05,
+ "loss": 0.6401,
+ "step": 5760
+ },
+ {
+ "epoch": 0.8098245614035088,
+ "grad_norm": 1.2036852836608887,
+ "learning_rate": 4.6850891087313e-05,
+ "loss": 0.6087,
+ "step": 5770
+ },
+ {
+ "epoch": 0.8112280701754386,
+ "grad_norm": 1.010108470916748,
+ "learning_rate": 4.6840165128310296e-05,
+ "loss": 0.6973,
+ "step": 5780
+ },
+ {
+ "epoch": 0.8126315789473684,
+ "grad_norm": 1.1753820180892944,
+ "learning_rate": 4.6829422165990475e-05,
+ "loss": 0.6509,
+ "step": 5790
+ },
+ {
+ "epoch": 0.8140350877192982,
+ "grad_norm": 1.0416866540908813,
+ "learning_rate": 4.6818662208717296e-05,
+ "loss": 0.6092,
+ "step": 5800
+ },
+ {
+ "epoch": 0.8154385964912281,
+ "grad_norm": 0.7539423108100891,
+ "learning_rate": 4.680788526486776e-05,
+ "loss": 0.5864,
+ "step": 5810
+ },
+ {
+ "epoch": 0.8168421052631579,
+ "grad_norm": 0.9227228164672852,
+ "learning_rate": 4.679709134283209e-05,
+ "loss": 0.5736,
+ "step": 5820
+ },
+ {
+ "epoch": 0.8182456140350877,
+ "grad_norm": 0.8869969844818115,
+ "learning_rate": 4.678628045101371e-05,
+ "loss": 0.5982,
+ "step": 5830
+ },
+ {
+ "epoch": 0.8196491228070175,
+ "grad_norm": 0.6802515387535095,
+ "learning_rate": 4.677545259782929e-05,
+ "loss": 0.6136,
+ "step": 5840
+ },
+ {
+ "epoch": 0.8210526315789474,
+ "grad_norm": 0.9065477848052979,
+ "learning_rate": 4.676460779170867e-05,
+ "loss": 0.6519,
+ "step": 5850
+ },
+ {
+ "epoch": 0.8224561403508772,
+ "grad_norm": 1.3136307001113892,
+ "learning_rate": 4.675374604109491e-05,
+ "loss": 0.6122,
+ "step": 5860
+ },
+ {
+ "epoch": 0.8238596491228071,
+ "grad_norm": 0.9648601412773132,
+ "learning_rate": 4.6742867354444256e-05,
+ "loss": 0.5582,
+ "step": 5870
+ },
+ {
+ "epoch": 0.8252631578947368,
+ "grad_norm": 1.085227370262146,
+ "learning_rate": 4.673197174022613e-05,
+ "loss": 0.6788,
+ "step": 5880
+ },
+ {
+ "epoch": 0.8266666666666667,
+ "grad_norm": 0.9866172075271606,
+ "learning_rate": 4.672105920692316e-05,
+ "loss": 0.647,
+ "step": 5890
+ },
+ {
+ "epoch": 0.8280701754385965,
+ "grad_norm": 1.5403311252593994,
+ "learning_rate": 4.6710129763031095e-05,
+ "loss": 0.6326,
+ "step": 5900
+ },
+ {
+ "epoch": 0.8294736842105264,
+ "grad_norm": 1.195082187652588,
+ "learning_rate": 4.669918341705891e-05,
+ "loss": 0.6205,
+ "step": 5910
+ },
+ {
+ "epoch": 0.8308771929824561,
+ "grad_norm": 0.9392557740211487,
+ "learning_rate": 4.66882201775287e-05,
+ "loss": 0.699,
+ "step": 5920
+ },
+ {
+ "epoch": 0.832280701754386,
+ "grad_norm": 1.280907392501831,
+ "learning_rate": 4.667724005297573e-05,
+ "loss": 0.6147,
+ "step": 5930
+ },
+ {
+ "epoch": 0.8336842105263158,
+ "grad_norm": 0.6876835823059082,
+ "learning_rate": 4.66662430519484e-05,
+ "loss": 0.5737,
+ "step": 5940
+ },
+ {
+ "epoch": 0.8350877192982457,
+ "grad_norm": 0.7067710161209106,
+ "learning_rate": 4.665522918300823e-05,
+ "loss": 0.6072,
+ "step": 5950
+ },
+ {
+ "epoch": 0.8364912280701754,
+ "grad_norm": 1.0336652994155884,
+ "learning_rate": 4.6644198454729933e-05,
+ "loss": 0.6296,
+ "step": 5960
+ },
+ {
+ "epoch": 0.8378947368421052,
+ "grad_norm": 1.3756647109985352,
+ "learning_rate": 4.663315087570128e-05,
+ "loss": 0.6489,
+ "step": 5970
+ },
+ {
+ "epoch": 0.8392982456140351,
+ "grad_norm": 1.0433988571166992,
+ "learning_rate": 4.662208645452321e-05,
+ "loss": 0.6742,
+ "step": 5980
+ },
+ {
+ "epoch": 0.840701754385965,
+ "grad_norm": 0.6354380249977112,
+ "learning_rate": 4.661100519980973e-05,
+ "loss": 0.573,
+ "step": 5990
+ },
+ {
+ "epoch": 0.8421052631578947,
+ "grad_norm": 1.112243890762329,
+ "learning_rate": 4.6599907120188005e-05,
+ "loss": 0.6455,
+ "step": 6000
+ },
+ {
+ "epoch": 0.8421052631578947,
+ "eval_loss": 0.6415141820907593,
+ "eval_runtime": 44.3465,
+ "eval_samples_per_second": 33.825,
+ "eval_steps_per_second": 8.456,
+ "step": 6000
+ },
+ {
+ "epoch": 0.8435087719298245,
+ "grad_norm": 0.781201958656311,
+ "learning_rate": 4.658879222429825e-05,
+ "loss": 0.5362,
+ "step": 6010
+ },
+ {
+ "epoch": 0.8449122807017544,
+ "grad_norm": 1.069032073020935,
+ "learning_rate": 4.65776605207938e-05,
+ "loss": 0.6321,
+ "step": 6020
+ },
+ {
+ "epoch": 0.8463157894736842,
+ "grad_norm": 1.0449451208114624,
+ "learning_rate": 4.656651201834106e-05,
+ "loss": 0.6208,
+ "step": 6030
+ },
+ {
+ "epoch": 0.847719298245614,
+ "grad_norm": 1.9674957990646362,
+ "learning_rate": 4.655534672561953e-05,
+ "loss": 0.6529,
+ "step": 6040
+ },
+ {
+ "epoch": 0.8491228070175438,
+ "grad_norm": 0.9335805773735046,
+ "learning_rate": 4.654416465132177e-05,
+ "loss": 0.6515,
+ "step": 6050
+ },
+ {
+ "epoch": 0.8505263157894737,
+ "grad_norm": 0.8951327800750732,
+ "learning_rate": 4.6532965804153416e-05,
+ "loss": 0.613,
+ "step": 6060
+ },
+ {
+ "epoch": 0.8519298245614035,
+ "grad_norm": 1.1679803133010864,
+ "learning_rate": 4.652175019283314e-05,
+ "loss": 0.6215,
+ "step": 6070
+ },
+ {
+ "epoch": 0.8533333333333334,
+ "grad_norm": 1.6771854162216187,
+ "learning_rate": 4.6510517826092695e-05,
+ "loss": 0.7427,
+ "step": 6080
+ },
+ {
+ "epoch": 0.8547368421052631,
+ "grad_norm": 0.6942294836044312,
+ "learning_rate": 4.649926871267685e-05,
+ "loss": 0.58,
+ "step": 6090
+ },
+ {
+ "epoch": 0.856140350877193,
+ "grad_norm": 1.114723801612854,
+ "learning_rate": 4.6488002861343425e-05,
+ "loss": 0.6916,
+ "step": 6100
+ },
+ {
+ "epoch": 0.8575438596491228,
+ "grad_norm": 0.9489352107048035,
+ "learning_rate": 4.647672028086328e-05,
+ "loss": 0.6073,
+ "step": 6110
+ },
+ {
+ "epoch": 0.8589473684210527,
+ "grad_norm": 0.8159108757972717,
+ "learning_rate": 4.646542098002029e-05,
+ "loss": 0.6273,
+ "step": 6120
+ },
+ {
+ "epoch": 0.8603508771929824,
+ "grad_norm": 1.2675360441207886,
+ "learning_rate": 4.645410496761135e-05,
+ "loss": 0.6657,
+ "step": 6130
+ },
+ {
+ "epoch": 0.8617543859649123,
+ "grad_norm": 0.9706358313560486,
+ "learning_rate": 4.644277225244635e-05,
+ "loss": 0.6861,
+ "step": 6140
+ },
+ {
+ "epoch": 0.8631578947368421,
+ "grad_norm": 1.0342049598693848,
+ "learning_rate": 4.6431422843348216e-05,
+ "loss": 0.6834,
+ "step": 6150
+ },
+ {
+ "epoch": 0.864561403508772,
+ "grad_norm": 0.9016236066818237,
+ "learning_rate": 4.642005674915284e-05,
+ "loss": 0.6098,
+ "step": 6160
+ },
+ {
+ "epoch": 0.8659649122807017,
+ "grad_norm": 0.8684419989585876,
+ "learning_rate": 4.640867397870912e-05,
+ "loss": 0.6831,
+ "step": 6170
+ },
+ {
+ "epoch": 0.8673684210526316,
+ "grad_norm": 0.8743478059768677,
+ "learning_rate": 4.639727454087892e-05,
+ "loss": 0.5846,
+ "step": 6180
+ },
+ {
+ "epoch": 0.8687719298245614,
+ "grad_norm": 1.0925372838974,
+ "learning_rate": 4.638585844453711e-05,
+ "loss": 0.6436,
+ "step": 6190
+ },
+ {
+ "epoch": 0.8701754385964913,
+ "grad_norm": 1.0224460363388062,
+ "learning_rate": 4.6374425698571514e-05,
+ "loss": 0.7538,
+ "step": 6200
+ },
+ {
+ "epoch": 0.871578947368421,
+ "grad_norm": 0.8540046215057373,
+ "learning_rate": 4.63629763118829e-05,
+ "loss": 0.596,
+ "step": 6210
+ },
+ {
+ "epoch": 0.8729824561403509,
+ "grad_norm": 0.9685525298118591,
+ "learning_rate": 4.6351510293385026e-05,
+ "loss": 0.5844,
+ "step": 6220
+ },
+ {
+ "epoch": 0.8743859649122807,
+ "grad_norm": 0.9988105893135071,
+ "learning_rate": 4.634002765200456e-05,
+ "loss": 0.5785,
+ "step": 6230
+ },
+ {
+ "epoch": 0.8757894736842106,
+ "grad_norm": 0.7331526279449463,
+ "learning_rate": 4.632852839668115e-05,
+ "loss": 0.5728,
+ "step": 6240
+ },
+ {
+ "epoch": 0.8771929824561403,
+ "grad_norm": 1.0520068407058716,
+ "learning_rate": 4.6317012536367354e-05,
+ "loss": 0.6317,
+ "step": 6250
+ },
+ {
+ "epoch": 0.8785964912280702,
+ "grad_norm": 1.117604374885559,
+ "learning_rate": 4.630548008002866e-05,
+ "loss": 0.6152,
+ "step": 6260
+ },
+ {
+ "epoch": 0.88,
+ "grad_norm": 0.7635726928710938,
+ "learning_rate": 4.629393103664349e-05,
+ "loss": 0.64,
+ "step": 6270
+ },
+ {
+ "epoch": 0.8814035087719299,
+ "grad_norm": 1.1754323244094849,
+ "learning_rate": 4.6282365415203164e-05,
+ "loss": 0.5923,
+ "step": 6280
+ },
+ {
+ "epoch": 0.8828070175438596,
+ "grad_norm": 0.6220813989639282,
+ "learning_rate": 4.627078322471191e-05,
+ "loss": 0.6745,
+ "step": 6290
+ },
+ {
+ "epoch": 0.8842105263157894,
+ "grad_norm": 0.8440349698066711,
+ "learning_rate": 4.625918447418687e-05,
+ "loss": 0.5819,
+ "step": 6300
+ },
+ {
+ "epoch": 0.8856140350877193,
+ "grad_norm": 1.0416796207427979,
+ "learning_rate": 4.624756917265807e-05,
+ "loss": 0.5965,
+ "step": 6310
+ },
+ {
+ "epoch": 0.8870175438596491,
+ "grad_norm": 1.1395715475082397,
+ "learning_rate": 4.62359373291684e-05,
+ "loss": 0.5838,
+ "step": 6320
+ },
+ {
+ "epoch": 0.888421052631579,
+ "grad_norm": 1.3868945837020874,
+ "learning_rate": 4.622428895277367e-05,
+ "loss": 0.7304,
+ "step": 6330
+ },
+ {
+ "epoch": 0.8898245614035087,
+ "grad_norm": 1.0000405311584473,
+ "learning_rate": 4.621262405254253e-05,
+ "loss": 0.5938,
+ "step": 6340
+ },
+ {
+ "epoch": 0.8912280701754386,
+ "grad_norm": 0.754399836063385,
+ "learning_rate": 4.620094263755652e-05,
+ "loss": 0.6276,
+ "step": 6350
+ },
+ {
+ "epoch": 0.8926315789473684,
+ "grad_norm": 0.9784127473831177,
+ "learning_rate": 4.618924471691e-05,
+ "loss": 0.613,
+ "step": 6360
+ },
+ {
+ "epoch": 0.8940350877192983,
+ "grad_norm": 0.6419925689697266,
+ "learning_rate": 4.617753029971021e-05,
+ "loss": 0.599,
+ "step": 6370
+ },
+ {
+ "epoch": 0.895438596491228,
+ "grad_norm": 1.2562180757522583,
+ "learning_rate": 4.6165799395077236e-05,
+ "loss": 0.6358,
+ "step": 6380
+ },
+ {
+ "epoch": 0.8968421052631579,
+ "grad_norm": 1.1815166473388672,
+ "learning_rate": 4.615405201214398e-05,
+ "loss": 0.6747,
+ "step": 6390
+ },
+ {
+ "epoch": 0.8982456140350877,
+ "grad_norm": 1.5243850946426392,
+ "learning_rate": 4.614228816005618e-05,
+ "loss": 0.6082,
+ "step": 6400
+ },
+ {
+ "epoch": 0.8996491228070176,
+ "grad_norm": 0.894396960735321,
+ "learning_rate": 4.61305078479724e-05,
+ "loss": 0.5506,
+ "step": 6410
+ },
+ {
+ "epoch": 0.9010526315789473,
+ "grad_norm": 0.7782644629478455,
+ "learning_rate": 4.611871108506403e-05,
+ "loss": 0.5816,
+ "step": 6420
+ },
+ {
+ "epoch": 0.9024561403508772,
+ "grad_norm": 1.2209144830703735,
+ "learning_rate": 4.610689788051523e-05,
+ "loss": 0.6178,
+ "step": 6430
+ },
+ {
+ "epoch": 0.903859649122807,
+ "grad_norm": 0.8224475979804993,
+ "learning_rate": 4.6095068243523e-05,
+ "loss": 0.644,
+ "step": 6440
+ },
+ {
+ "epoch": 0.9052631578947369,
+ "grad_norm": 1.054763674736023,
+ "learning_rate": 4.608322218329711e-05,
+ "loss": 0.5564,
+ "step": 6450
+ },
+ {
+ "epoch": 0.9066666666666666,
+ "grad_norm": 0.9225585460662842,
+ "learning_rate": 4.607135970906014e-05,
+ "loss": 0.6534,
+ "step": 6460
+ },
+ {
+ "epoch": 0.9080701754385965,
+ "grad_norm": 0.7979352474212646,
+ "learning_rate": 4.605948083004741e-05,
+ "loss": 0.6671,
+ "step": 6470
+ },
+ {
+ "epoch": 0.9094736842105263,
+ "grad_norm": 1.223375916481018,
+ "learning_rate": 4.6047585555507045e-05,
+ "loss": 0.6996,
+ "step": 6480
+ },
+ {
+ "epoch": 0.9108771929824562,
+ "grad_norm": 0.803092360496521,
+ "learning_rate": 4.603567389469993e-05,
+ "loss": 0.5937,
+ "step": 6490
+ },
+ {
+ "epoch": 0.9122807017543859,
+ "grad_norm": 0.9455748796463013,
+ "learning_rate": 4.60237458568997e-05,
+ "loss": 0.555,
+ "step": 6500
+ },
+ {
+ "epoch": 0.9136842105263158,
+ "grad_norm": 1.0315808057785034,
+ "learning_rate": 4.6011801451392736e-05,
+ "loss": 0.6721,
+ "step": 6510
+ },
+ {
+ "epoch": 0.9150877192982456,
+ "grad_norm": 1.0508462190628052,
+ "learning_rate": 4.5999840687478167e-05,
+ "loss": 0.5849,
+ "step": 6520
+ },
+ {
+ "epoch": 0.9164912280701755,
+ "grad_norm": 0.8299797773361206,
+ "learning_rate": 4.598786357446786e-05,
+ "loss": 0.6013,
+ "step": 6530
+ },
+ {
+ "epoch": 0.9178947368421052,
+ "grad_norm": 0.8177257776260376,
+ "learning_rate": 4.5975870121686406e-05,
+ "loss": 0.6178,
+ "step": 6540
+ },
+ {
+ "epoch": 0.9192982456140351,
+ "grad_norm": 0.8297099471092224,
+ "learning_rate": 4.596386033847111e-05,
+ "loss": 0.5985,
+ "step": 6550
+ },
+ {
+ "epoch": 0.9207017543859649,
+ "grad_norm": 1.1290909051895142,
+ "learning_rate": 4.5951834234172025e-05,
+ "loss": 0.5878,
+ "step": 6560
+ },
+ {
+ "epoch": 0.9221052631578948,
+ "grad_norm": 0.7982479929924011,
+ "learning_rate": 4.593979181815187e-05,
+ "loss": 0.7004,
+ "step": 6570
+ },
+ {
+ "epoch": 0.9235087719298246,
+ "grad_norm": 0.9358506202697754,
+ "learning_rate": 4.5927733099786066e-05,
+ "loss": 0.615,
+ "step": 6580
+ },
+ {
+ "epoch": 0.9249122807017544,
+ "grad_norm": 0.8648248910903931,
+ "learning_rate": 4.591565808846276e-05,
+ "loss": 0.5789,
+ "step": 6590
+ },
+ {
+ "epoch": 0.9263157894736842,
+ "grad_norm": 1.1170403957366943,
+ "learning_rate": 4.5903566793582755e-05,
+ "loss": 0.5827,
+ "step": 6600
+ },
+ {
+ "epoch": 0.927719298245614,
+ "grad_norm": 1.5100983381271362,
+ "learning_rate": 4.589145922455954e-05,
+ "loss": 0.6176,
+ "step": 6610
+ },
+ {
+ "epoch": 0.9291228070175439,
+ "grad_norm": 1.2393382787704468,
+ "learning_rate": 4.587933539081927e-05,
+ "loss": 0.7056,
+ "step": 6620
+ },
+ {
+ "epoch": 0.9305263157894736,
+ "grad_norm": 0.8480477333068848,
+ "learning_rate": 4.586719530180075e-05,
+ "loss": 0.6418,
+ "step": 6630
+ },
+ {
+ "epoch": 0.9319298245614035,
+ "grad_norm": 1.1262218952178955,
+ "learning_rate": 4.585503896695549e-05,
+ "loss": 0.5231,
+ "step": 6640
+ },
+ {
+ "epoch": 0.9333333333333333,
+ "grad_norm": 1.1744680404663086,
+ "learning_rate": 4.584286639574758e-05,
+ "loss": 0.7463,
+ "step": 6650
+ },
+ {
+ "epoch": 0.9347368421052632,
+ "grad_norm": 1.0590097904205322,
+ "learning_rate": 4.58306775976538e-05,
+ "loss": 0.6373,
+ "step": 6660
+ },
+ {
+ "epoch": 0.9361403508771929,
+ "grad_norm": 1.1323457956314087,
+ "learning_rate": 4.581847258216355e-05,
+ "loss": 0.6609,
+ "step": 6670
+ },
+ {
+ "epoch": 0.9375438596491228,
+ "grad_norm": 1.043113350868225,
+ "learning_rate": 4.580625135877884e-05,
+ "loss": 0.6366,
+ "step": 6680
+ },
+ {
+ "epoch": 0.9389473684210526,
+ "grad_norm": 0.6503088474273682,
+ "learning_rate": 4.5794013937014326e-05,
+ "loss": 0.6359,
+ "step": 6690
+ },
+ {
+ "epoch": 0.9403508771929825,
+ "grad_norm": 1.6966040134429932,
+ "learning_rate": 4.578176032639724e-05,
+ "loss": 0.7239,
+ "step": 6700
+ },
+ {
+ "epoch": 0.9417543859649122,
+ "grad_norm": 0.7537420988082886,
+ "learning_rate": 4.5769490536467465e-05,
+ "loss": 0.5848,
+ "step": 6710
+ },
+ {
+ "epoch": 0.9431578947368421,
+ "grad_norm": 1.1561657190322876,
+ "learning_rate": 4.5757204576777437e-05,
+ "loss": 0.6155,
+ "step": 6720
+ },
+ {
+ "epoch": 0.9445614035087719,
+ "grad_norm": 0.6912992000579834,
+ "learning_rate": 4.574490245689219e-05,
+ "loss": 0.5817,
+ "step": 6730
+ },
+ {
+ "epoch": 0.9459649122807018,
+ "grad_norm": 1.0430986881256104,
+ "learning_rate": 4.573258418638936e-05,
+ "loss": 0.4903,
+ "step": 6740
+ },
+ {
+ "epoch": 0.9473684210526315,
+ "grad_norm": 0.9669928550720215,
+ "learning_rate": 4.572024977485914e-05,
+ "loss": 0.6289,
+ "step": 6750
+ },
+ {
+ "epoch": 0.9487719298245614,
+ "grad_norm": 0.9165218472480774,
+ "learning_rate": 4.5707899231904286e-05,
+ "loss": 0.6133,
+ "step": 6760
+ },
+ {
+ "epoch": 0.9501754385964912,
+ "grad_norm": 0.9709174036979675,
+ "learning_rate": 4.569553256714012e-05,
+ "loss": 0.638,
+ "step": 6770
+ },
+ {
+ "epoch": 0.9515789473684211,
+ "grad_norm": 1.2939212322235107,
+ "learning_rate": 4.5683149790194526e-05,
+ "loss": 0.7599,
+ "step": 6780
+ },
+ {
+ "epoch": 0.9529824561403509,
+ "grad_norm": 1.0535619258880615,
+ "learning_rate": 4.5670750910707903e-05,
+ "loss": 0.6906,
+ "step": 6790
+ },
+ {
+ "epoch": 0.9543859649122807,
+ "grad_norm": 0.9928086400032043,
+ "learning_rate": 4.565833593833321e-05,
+ "loss": 0.7387,
+ "step": 6800
+ },
+ {
+ "epoch": 0.9557894736842105,
+ "grad_norm": 0.7087190747261047,
+ "learning_rate": 4.5645904882735935e-05,
+ "loss": 0.5566,
+ "step": 6810
+ },
+ {
+ "epoch": 0.9571929824561404,
+ "grad_norm": 1.2111977338790894,
+ "learning_rate": 4.563345775359408e-05,
+ "loss": 0.5748,
+ "step": 6820
+ },
+ {
+ "epoch": 0.9585964912280702,
+ "grad_norm": 1.2516281604766846,
+ "learning_rate": 4.562099456059815e-05,
+ "loss": 0.6256,
+ "step": 6830
+ },
+ {
+ "epoch": 0.96,
+ "grad_norm": 0.8081939220428467,
+ "learning_rate": 4.5608515313451186e-05,
+ "loss": 0.5826,
+ "step": 6840
+ },
+ {
+ "epoch": 0.9614035087719298,
+ "grad_norm": 1.453393578529358,
+ "learning_rate": 4.559602002186869e-05,
+ "loss": 0.5538,
+ "step": 6850
+ },
+ {
+ "epoch": 0.9628070175438597,
+ "grad_norm": 1.139618158340454,
+ "learning_rate": 4.558350869557868e-05,
+ "loss": 0.6514,
+ "step": 6860
+ },
+ {
+ "epoch": 0.9642105263157895,
+ "grad_norm": 0.9846227765083313,
+ "learning_rate": 4.557098134432167e-05,
+ "loss": 0.7813,
+ "step": 6870
+ },
+ {
+ "epoch": 0.9656140350877193,
+ "grad_norm": 0.8734840750694275,
+ "learning_rate": 4.555843797785061e-05,
+ "loss": 0.5993,
+ "step": 6880
+ },
+ {
+ "epoch": 0.9670175438596491,
+ "grad_norm": 1.172455072402954,
+ "learning_rate": 4.554587860593095e-05,
+ "loss": 0.5594,
+ "step": 6890
+ },
+ {
+ "epoch": 0.968421052631579,
+ "grad_norm": 0.5644223690032959,
+ "learning_rate": 4.553330323834059e-05,
+ "loss": 0.5581,
+ "step": 6900
+ },
+ {
+ "epoch": 0.9698245614035088,
+ "grad_norm": 0.7265453338623047,
+ "learning_rate": 4.552071188486989e-05,
+ "loss": 0.6295,
+ "step": 6910
+ },
+ {
+ "epoch": 0.9712280701754386,
+ "grad_norm": 0.8341143727302551,
+ "learning_rate": 4.550810455532164e-05,
+ "loss": 0.5697,
+ "step": 6920
+ },
+ {
+ "epoch": 0.9726315789473684,
+ "grad_norm": 0.7036447525024414,
+ "learning_rate": 4.5495481259511095e-05,
+ "loss": 0.5933,
+ "step": 6930
+ },
+ {
+ "epoch": 0.9740350877192983,
+ "grad_norm": 0.972158670425415,
+ "learning_rate": 4.54828420072659e-05,
+ "loss": 0.5723,
+ "step": 6940
+ },
+ {
+ "epoch": 0.9754385964912281,
+ "grad_norm": 1.3979262113571167,
+ "learning_rate": 4.547018680842616e-05,
+ "loss": 0.5749,
+ "step": 6950
+ },
+ {
+ "epoch": 0.9768421052631578,
+ "grad_norm": 1.3824502229690552,
+ "learning_rate": 4.545751567284439e-05,
+ "loss": 0.7053,
+ "step": 6960
+ },
+ {
+ "epoch": 0.9782456140350877,
+ "grad_norm": 1.1198428869247437,
+ "learning_rate": 4.5444828610385486e-05,
+ "loss": 0.629,
+ "step": 6970
+ },
+ {
+ "epoch": 0.9796491228070175,
+ "grad_norm": 0.7075201869010925,
+ "learning_rate": 4.543212563092677e-05,
+ "loss": 0.6647,
+ "step": 6980
+ },
+ {
+ "epoch": 0.9810526315789474,
+ "grad_norm": 1.0392223596572876,
+ "learning_rate": 4.541940674435794e-05,
+ "loss": 0.6921,
+ "step": 6990
+ },
+ {
+ "epoch": 0.9824561403508771,
+ "grad_norm": 1.027004599571228,
+ "learning_rate": 4.5406671960581096e-05,
+ "loss": 0.6187,
+ "step": 7000
+ },
+ {
+ "epoch": 0.983859649122807,
+ "grad_norm": 1.2175973653793335,
+ "learning_rate": 4.53939212895107e-05,
+ "loss": 0.6471,
+ "step": 7010
+ },
+ {
+ "epoch": 0.9852631578947368,
+ "grad_norm": 1.0904464721679688,
+ "learning_rate": 4.538115474107357e-05,
+ "loss": 0.5916,
+ "step": 7020
+ },
+ {
+ "epoch": 0.9866666666666667,
+ "grad_norm": 1.0575454235076904,
+ "learning_rate": 4.536837232520893e-05,
+ "loss": 0.6859,
+ "step": 7030
+ },
+ {
+ "epoch": 0.9880701754385965,
+ "grad_norm": 0.7100856304168701,
+ "learning_rate": 4.535557405186831e-05,
+ "loss": 0.64,
+ "step": 7040
+ },
+ {
+ "epoch": 0.9894736842105263,
+ "grad_norm": 0.9754576683044434,
+ "learning_rate": 4.534275993101561e-05,
+ "loss": 0.5839,
+ "step": 7050
+ },
+ {
+ "epoch": 0.9908771929824561,
+ "grad_norm": 0.8776949644088745,
+ "learning_rate": 4.532992997262706e-05,
+ "loss": 0.577,
+ "step": 7060
+ },
+ {
+ "epoch": 0.992280701754386,
+ "grad_norm": 1.569716215133667,
+ "learning_rate": 4.531708418669122e-05,
+ "loss": 0.7155,
+ "step": 7070
+ },
+ {
+ "epoch": 0.9936842105263158,
+ "grad_norm": 0.7174299955368042,
+ "learning_rate": 4.5304222583208983e-05,
+ "loss": 0.6713,
+ "step": 7080
+ },
+ {
+ "epoch": 0.9950877192982456,
+ "grad_norm": 1.0695897340774536,
+ "learning_rate": 4.5291345172193546e-05,
+ "loss": 0.6528,
+ "step": 7090
+ },
+ {
+ "epoch": 0.9964912280701754,
+ "grad_norm": 1.0685267448425293,
+ "learning_rate": 4.5278451963670403e-05,
+ "loss": 0.5705,
+ "step": 7100
+ },
+ {
+ "epoch": 0.9978947368421053,
+ "grad_norm": 1.2662453651428223,
+ "learning_rate": 4.526554296767738e-05,
+ "loss": 0.6763,
+ "step": 7110
+ },
+ {
+ "epoch": 0.9992982456140351,
+ "grad_norm": 1.1944515705108643,
+ "learning_rate": 4.525261819426455e-05,
+ "loss": 0.5855,
+ "step": 7120
+ },
+ {
+ "epoch": 1.0007017543859649,
+ "grad_norm": 0.7510038614273071,
+ "learning_rate": 4.5239677653494305e-05,
+ "loss": 0.5631,
+ "step": 7130
+ },
+ {
+ "epoch": 1.0021052631578948,
+ "grad_norm": 0.7383008003234863,
+ "learning_rate": 4.5226721355441306e-05,
+ "loss": 0.493,
+ "step": 7140
+ },
+ {
+ "epoch": 1.0035087719298246,
+ "grad_norm": 1.3719711303710938,
+ "learning_rate": 4.5213749310192455e-05,
+ "loss": 0.5049,
+ "step": 7150
+ },
+ {
+ "epoch": 1.0049122807017543,
+ "grad_norm": 0.7755573987960815,
+ "learning_rate": 4.520076152784695e-05,
+ "loss": 0.5204,
+ "step": 7160
+ },
+ {
+ "epoch": 1.0063157894736843,
+ "grad_norm": 1.0142574310302734,
+ "learning_rate": 4.518775801851622e-05,
+ "loss": 0.5395,
+ "step": 7170
+ },
+ {
+ "epoch": 1.007719298245614,
+ "grad_norm": 1.0278340578079224,
+ "learning_rate": 4.517473879232395e-05,
+ "loss": 0.5231,
+ "step": 7180
+ },
+ {
+ "epoch": 1.0091228070175438,
+ "grad_norm": 1.4633328914642334,
+ "learning_rate": 4.516170385940603e-05,
+ "loss": 0.5764,
+ "step": 7190
+ },
+ {
+ "epoch": 1.0105263157894737,
+ "grad_norm": 1.5323199033737183,
+ "learning_rate": 4.514865322991063e-05,
+ "loss": 0.5339,
+ "step": 7200
+ },
+ {
+ "epoch": 1.0119298245614035,
+ "grad_norm": 1.379055380821228,
+ "learning_rate": 4.51355869139981e-05,
+ "loss": 0.5684,
+ "step": 7210
+ },
+ {
+ "epoch": 1.0133333333333334,
+ "grad_norm": 0.9581233859062195,
+ "learning_rate": 4.512250492184101e-05,
+ "loss": 0.539,
+ "step": 7220
+ },
+ {
+ "epoch": 1.0147368421052632,
+ "grad_norm": 1.1807743310928345,
+ "learning_rate": 4.510940726362416e-05,
+ "loss": 0.5348,
+ "step": 7230
+ },
+ {
+ "epoch": 1.016140350877193,
+ "grad_norm": 1.2164653539657593,
+ "learning_rate": 4.50962939495445e-05,
+ "loss": 0.5795,
+ "step": 7240
+ },
+ {
+ "epoch": 1.0175438596491229,
+ "grad_norm": 0.7895627617835999,
+ "learning_rate": 4.50831649898112e-05,
+ "loss": 0.5894,
+ "step": 7250
+ },
+ {
+ "epoch": 1.0189473684210526,
+ "grad_norm": 1.4003938436508179,
+ "learning_rate": 4.507002039464562e-05,
+ "loss": 0.5825,
+ "step": 7260
+ },
+ {
+ "epoch": 1.0203508771929826,
+ "grad_norm": 0.8824437856674194,
+ "learning_rate": 4.505686017428127e-05,
+ "loss": 0.5513,
+ "step": 7270
+ },
+ {
+ "epoch": 1.0217543859649123,
+ "grad_norm": 1.0241267681121826,
+ "learning_rate": 4.504368433896382e-05,
+ "loss": 0.6675,
+ "step": 7280
+ },
+ {
+ "epoch": 1.023157894736842,
+ "grad_norm": 1.1416174173355103,
+ "learning_rate": 4.5030492898951134e-05,
+ "loss": 0.5146,
+ "step": 7290
+ },
+ {
+ "epoch": 1.024561403508772,
+ "grad_norm": 1.4306304454803467,
+ "learning_rate": 4.501728586451318e-05,
+ "loss": 0.6254,
+ "step": 7300
+ },
+ {
+ "epoch": 1.0259649122807017,
+ "grad_norm": 0.7919867634773254,
+ "learning_rate": 4.5004063245932097e-05,
+ "loss": 0.4688,
+ "step": 7310
+ },
+ {
+ "epoch": 1.0273684210526315,
+ "grad_norm": 1.0270862579345703,
+ "learning_rate": 4.4990825053502136e-05,
+ "loss": 0.5227,
+ "step": 7320
+ },
+ {
+ "epoch": 1.0287719298245614,
+ "grad_norm": 1.332261085510254,
+ "learning_rate": 4.497757129752969e-05,
+ "loss": 0.5219,
+ "step": 7330
+ },
+ {
+ "epoch": 1.0301754385964912,
+ "grad_norm": 1.1045178174972534,
+ "learning_rate": 4.496430198833327e-05,
+ "loss": 0.5006,
+ "step": 7340
+ },
+ {
+ "epoch": 1.0315789473684212,
+ "grad_norm": 1.069557547569275,
+ "learning_rate": 4.495101713624348e-05,
+ "loss": 0.519,
+ "step": 7350
+ },
+ {
+ "epoch": 1.032982456140351,
+ "grad_norm": 1.0084444284439087,
+ "learning_rate": 4.493771675160303e-05,
+ "loss": 0.6042,
+ "step": 7360
+ },
+ {
+ "epoch": 1.0343859649122806,
+ "grad_norm": 1.1198923587799072,
+ "learning_rate": 4.4924400844766734e-05,
+ "loss": 0.5092,
+ "step": 7370
+ },
+ {
+ "epoch": 1.0357894736842106,
+ "grad_norm": 0.7310131788253784,
+ "learning_rate": 4.491106942610147e-05,
+ "loss": 0.6178,
+ "step": 7380
+ },
+ {
+ "epoch": 1.0371929824561403,
+ "grad_norm": 1.1642857789993286,
+ "learning_rate": 4.489772250598622e-05,
+ "loss": 0.6226,
+ "step": 7390
+ },
+ {
+ "epoch": 1.03859649122807,
+ "grad_norm": 1.194806456565857,
+ "learning_rate": 4.488436009481201e-05,
+ "loss": 0.5821,
+ "step": 7400
+ },
+ {
+ "epoch": 1.04,
+ "grad_norm": 1.3842540979385376,
+ "learning_rate": 4.487098220298193e-05,
+ "loss": 0.5265,
+ "step": 7410
+ },
+ {
+ "epoch": 1.0414035087719298,
+ "grad_norm": 1.546705722808838,
+ "learning_rate": 4.485758884091113e-05,
+ "loss": 0.5617,
+ "step": 7420
+ },
+ {
+ "epoch": 1.0428070175438597,
+ "grad_norm": 1.5181100368499756,
+ "learning_rate": 4.4844180019026805e-05,
+ "loss": 0.5468,
+ "step": 7430
+ },
+ {
+ "epoch": 1.0442105263157895,
+ "grad_norm": 1.8832321166992188,
+ "learning_rate": 4.483075574776819e-05,
+ "loss": 0.5048,
+ "step": 7440
+ },
+ {
+ "epoch": 1.0456140350877192,
+ "grad_norm": 1.2133930921554565,
+ "learning_rate": 4.4817316037586524e-05,
+ "loss": 0.5684,
+ "step": 7450
+ },
+ {
+ "epoch": 1.0470175438596492,
+ "grad_norm": 1.6424169540405273,
+ "learning_rate": 4.480386089894509e-05,
+ "loss": 0.5851,
+ "step": 7460
+ },
+ {
+ "epoch": 1.048421052631579,
+ "grad_norm": 1.2695761919021606,
+ "learning_rate": 4.479039034231918e-05,
+ "loss": 0.5308,
+ "step": 7470
+ },
+ {
+ "epoch": 1.0498245614035087,
+ "grad_norm": 1.3531373739242554,
+ "learning_rate": 4.477690437819607e-05,
+ "loss": 0.5904,
+ "step": 7480
+ },
+ {
+ "epoch": 1.0512280701754386,
+ "grad_norm": 1.6086102724075317,
+ "learning_rate": 4.476340301707507e-05,
+ "loss": 0.4894,
+ "step": 7490
+ },
+ {
+ "epoch": 1.0526315789473684,
+ "grad_norm": 0.7145791053771973,
+ "learning_rate": 4.4749886269467416e-05,
+ "loss": 0.4906,
+ "step": 7500
+ },
+ {
+ "epoch": 1.0540350877192983,
+ "grad_norm": 1.5852210521697998,
+ "learning_rate": 4.473635414589639e-05,
+ "loss": 0.5399,
+ "step": 7510
+ },
+ {
+ "epoch": 1.055438596491228,
+ "grad_norm": 1.3225674629211426,
+ "learning_rate": 4.47228066568972e-05,
+ "loss": 0.5168,
+ "step": 7520
+ },
+ {
+ "epoch": 1.0568421052631578,
+ "grad_norm": 1.3213186264038086,
+ "learning_rate": 4.470924381301704e-05,
+ "loss": 0.4888,
+ "step": 7530
+ },
+ {
+ "epoch": 1.0582456140350878,
+ "grad_norm": 1.4983114004135132,
+ "learning_rate": 4.469566562481503e-05,
+ "loss": 0.4909,
+ "step": 7540
+ },
+ {
+ "epoch": 1.0596491228070175,
+ "grad_norm": 1.3175050020217896,
+ "learning_rate": 4.4682072102862286e-05,
+ "loss": 0.5369,
+ "step": 7550
+ },
+ {
+ "epoch": 1.0610526315789475,
+ "grad_norm": 1.14377760887146,
+ "learning_rate": 4.466846325774179e-05,
+ "loss": 0.5046,
+ "step": 7560
+ },
+ {
+ "epoch": 1.0624561403508772,
+ "grad_norm": 0.6691097021102905,
+ "learning_rate": 4.4654839100048535e-05,
+ "loss": 0.5201,
+ "step": 7570
+ },
+ {
+ "epoch": 1.063859649122807,
+ "grad_norm": 1.4467300176620483,
+ "learning_rate": 4.464119964038937e-05,
+ "loss": 0.5238,
+ "step": 7580
+ },
+ {
+ "epoch": 1.065263157894737,
+ "grad_norm": 0.8880655169487,
+ "learning_rate": 4.462754488938309e-05,
+ "loss": 0.5074,
+ "step": 7590
+ },
+ {
+ "epoch": 1.0666666666666667,
+ "grad_norm": 2.043294906616211,
+ "learning_rate": 4.4613874857660384e-05,
+ "loss": 0.5297,
+ "step": 7600
+ },
+ {
+ "epoch": 1.0680701754385964,
+ "grad_norm": 1.054681420326233,
+ "learning_rate": 4.460018955586384e-05,
+ "loss": 0.5585,
+ "step": 7610
+ },
+ {
+ "epoch": 1.0694736842105264,
+ "grad_norm": 1.256369709968567,
+ "learning_rate": 4.458648899464793e-05,
+ "loss": 0.4944,
+ "step": 7620
+ },
+ {
+ "epoch": 1.070877192982456,
+ "grad_norm": 1.0441490411758423,
+ "learning_rate": 4.457277318467903e-05,
+ "loss": 0.5736,
+ "step": 7630
+ },
+ {
+ "epoch": 1.072280701754386,
+ "grad_norm": 0.885286271572113,
+ "learning_rate": 4.4559042136635345e-05,
+ "loss": 0.6152,
+ "step": 7640
+ },
+ {
+ "epoch": 1.0736842105263158,
+ "grad_norm": 1.8804951906204224,
+ "learning_rate": 4.4545295861206975e-05,
+ "loss": 0.4936,
+ "step": 7650
+ },
+ {
+ "epoch": 1.0750877192982455,
+ "grad_norm": 1.5045465230941772,
+ "learning_rate": 4.453153436909587e-05,
+ "loss": 0.5547,
+ "step": 7660
+ },
+ {
+ "epoch": 1.0764912280701755,
+ "grad_norm": 1.7368062734603882,
+ "learning_rate": 4.4517757671015826e-05,
+ "loss": 0.537,
+ "step": 7670
+ },
+ {
+ "epoch": 1.0778947368421052,
+ "grad_norm": 1.3677830696105957,
+ "learning_rate": 4.4503965777692456e-05,
+ "loss": 0.5131,
+ "step": 7680
+ },
+ {
+ "epoch": 1.079298245614035,
+ "grad_norm": 1.2926596403121948,
+ "learning_rate": 4.449015869986325e-05,
+ "loss": 0.4782,
+ "step": 7690
+ },
+ {
+ "epoch": 1.080701754385965,
+ "grad_norm": 2.191722869873047,
+ "learning_rate": 4.447633644827747e-05,
+ "loss": 0.4962,
+ "step": 7700
+ },
+ {
+ "epoch": 1.0821052631578947,
+ "grad_norm": 1.8317209482192993,
+ "learning_rate": 4.446249903369621e-05,
+ "loss": 0.5025,
+ "step": 7710
+ },
+ {
+ "epoch": 1.0835087719298246,
+ "grad_norm": 1.2881171703338623,
+ "learning_rate": 4.444864646689239e-05,
+ "loss": 0.4816,
+ "step": 7720
+ },
+ {
+ "epoch": 1.0849122807017544,
+ "grad_norm": 1.1918405294418335,
+ "learning_rate": 4.443477875865071e-05,
+ "loss": 0.4762,
+ "step": 7730
+ },
+ {
+ "epoch": 1.0863157894736841,
+ "grad_norm": 1.1728036403656006,
+ "learning_rate": 4.4420895919767626e-05,
+ "loss": 0.4501,
+ "step": 7740
+ },
+ {
+ "epoch": 1.087719298245614,
+ "grad_norm": 1.505370855331421,
+ "learning_rate": 4.440699796105143e-05,
+ "loss": 0.4855,
+ "step": 7750
+ },
+ {
+ "epoch": 1.0891228070175438,
+ "grad_norm": 1.6580755710601807,
+ "learning_rate": 4.439308489332215e-05,
+ "loss": 0.5558,
+ "step": 7760
+ },
+ {
+ "epoch": 1.0905263157894738,
+ "grad_norm": 1.496596097946167,
+ "learning_rate": 4.437915672741158e-05,
+ "loss": 0.5219,
+ "step": 7770
+ },
+ {
+ "epoch": 1.0919298245614035,
+ "grad_norm": 1.2828936576843262,
+ "learning_rate": 4.43652134741633e-05,
+ "loss": 0.4643,
+ "step": 7780
+ },
+ {
+ "epoch": 1.0933333333333333,
+ "grad_norm": 1.2443900108337402,
+ "learning_rate": 4.435125514443258e-05,
+ "loss": 0.562,
+ "step": 7790
+ },
+ {
+ "epoch": 1.0947368421052632,
+ "grad_norm": 1.5212448835372925,
+ "learning_rate": 4.4337281749086477e-05,
+ "loss": 0.5022,
+ "step": 7800
+ },
+ {
+ "epoch": 1.096140350877193,
+ "grad_norm": 2.2898428440093994,
+ "learning_rate": 4.432329329900375e-05,
+ "loss": 0.5462,
+ "step": 7810
+ },
+ {
+ "epoch": 1.0975438596491227,
+ "grad_norm": 1.3173396587371826,
+ "learning_rate": 4.4309289805074895e-05,
+ "loss": 0.5103,
+ "step": 7820
+ },
+ {
+ "epoch": 1.0989473684210527,
+ "grad_norm": 1.3435895442962646,
+ "learning_rate": 4.42952712782021e-05,
+ "loss": 0.5588,
+ "step": 7830
+ },
+ {
+ "epoch": 1.1003508771929824,
+ "grad_norm": 1.1860660314559937,
+ "learning_rate": 4.428123772929928e-05,
+ "loss": 0.5107,
+ "step": 7840
+ },
+ {
+ "epoch": 1.1017543859649124,
+ "grad_norm": 1.2616344690322876,
+ "learning_rate": 4.426718916929202e-05,
+ "loss": 0.463,
+ "step": 7850
+ },
+ {
+ "epoch": 1.1031578947368421,
+ "grad_norm": 0.8766297101974487,
+ "learning_rate": 4.425312560911762e-05,
+ "loss": 0.4801,
+ "step": 7860
+ },
+ {
+ "epoch": 1.1045614035087719,
+ "grad_norm": 1.4563919305801392,
+ "learning_rate": 4.4239047059725035e-05,
+ "loss": 0.4755,
+ "step": 7870
+ },
+ {
+ "epoch": 1.1059649122807018,
+ "grad_norm": 1.3378584384918213,
+ "learning_rate": 4.422495353207491e-05,
+ "loss": 0.4449,
+ "step": 7880
+ },
+ {
+ "epoch": 1.1073684210526316,
+ "grad_norm": 1.259637713432312,
+ "learning_rate": 4.4210845037139525e-05,
+ "loss": 0.4613,
+ "step": 7890
+ },
+ {
+ "epoch": 1.1087719298245613,
+ "grad_norm": 1.8832120895385742,
+ "learning_rate": 4.419672158590282e-05,
+ "loss": 0.6132,
+ "step": 7900
+ },
+ {
+ "epoch": 1.1101754385964913,
+ "grad_norm": 1.7765206098556519,
+ "learning_rate": 4.4182583189360415e-05,
+ "loss": 0.5235,
+ "step": 7910
+ },
+ {
+ "epoch": 1.111578947368421,
+ "grad_norm": 1.5252950191497803,
+ "learning_rate": 4.416842985851951e-05,
+ "loss": 0.5066,
+ "step": 7920
+ },
+ {
+ "epoch": 1.112982456140351,
+ "grad_norm": 1.145727515220642,
+ "learning_rate": 4.415426160439897e-05,
+ "loss": 0.5148,
+ "step": 7930
+ },
+ {
+ "epoch": 1.1143859649122807,
+ "grad_norm": 1.413393259048462,
+ "learning_rate": 4.414007843802927e-05,
+ "loss": 0.4731,
+ "step": 7940
+ },
+ {
+ "epoch": 1.1157894736842104,
+ "grad_norm": 1.227738618850708,
+ "learning_rate": 4.412588037045248e-05,
+ "loss": 0.4657,
+ "step": 7950
+ },
+ {
+ "epoch": 1.1171929824561404,
+ "grad_norm": 1.1237843036651611,
+ "learning_rate": 4.411166741272228e-05,
+ "loss": 0.5292,
+ "step": 7960
+ },
+ {
+ "epoch": 1.1185964912280701,
+ "grad_norm": 1.5785701274871826,
+ "learning_rate": 4.4097439575903964e-05,
+ "loss": 0.5086,
+ "step": 7970
+ },
+ {
+ "epoch": 1.12,
+ "grad_norm": 0.8202313780784607,
+ "learning_rate": 4.408319687107437e-05,
+ "loss": 0.4074,
+ "step": 7980
+ },
+ {
+ "epoch": 1.1214035087719298,
+ "grad_norm": 1.6983180046081543,
+ "learning_rate": 4.406893930932195e-05,
+ "loss": 0.5302,
+ "step": 7990
+ },
+ {
+ "epoch": 1.1228070175438596,
+ "grad_norm": 1.3259834051132202,
+ "learning_rate": 4.4054666901746685e-05,
+ "loss": 0.5533,
+ "step": 8000
+ },
+ {
+ "epoch": 1.1228070175438596,
+ "eval_loss": 0.6547604203224182,
+ "eval_runtime": 43.9015,
+ "eval_samples_per_second": 34.167,
+ "eval_steps_per_second": 8.542,
+ "step": 8000
+ },
+ {
+ "epoch": 1.1242105263157895,
+ "grad_norm": 1.5617778301239014,
+ "learning_rate": 4.404037965946015e-05,
+ "loss": 0.5533,
+ "step": 8010
+ },
+ {
+ "epoch": 1.1256140350877193,
+ "grad_norm": 1.3925784826278687,
+ "learning_rate": 4.402607759358545e-05,
+ "loss": 0.573,
+ "step": 8020
+ },
+ {
+ "epoch": 1.127017543859649,
+ "grad_norm": 2.007066011428833,
+ "learning_rate": 4.401176071525722e-05,
+ "loss": 0.6002,
+ "step": 8030
+ },
+ {
+ "epoch": 1.128421052631579,
+ "grad_norm": 1.2479066848754883,
+ "learning_rate": 4.399742903562166e-05,
+ "loss": 0.5412,
+ "step": 8040
+ },
+ {
+ "epoch": 1.1298245614035087,
+ "grad_norm": 2.116882562637329,
+ "learning_rate": 4.3983082565836454e-05,
+ "loss": 0.5516,
+ "step": 8050
+ },
+ {
+ "epoch": 1.1312280701754387,
+ "grad_norm": 1.0991559028625488,
+ "learning_rate": 4.3968721317070835e-05,
+ "loss": 0.5142,
+ "step": 8060
+ },
+ {
+ "epoch": 1.1326315789473684,
+ "grad_norm": 1.5136295557022095,
+ "learning_rate": 4.395434530050553e-05,
+ "loss": 0.4974,
+ "step": 8070
+ },
+ {
+ "epoch": 1.1340350877192982,
+ "grad_norm": 1.6304662227630615,
+ "learning_rate": 4.393995452733274e-05,
+ "loss": 0.5921,
+ "step": 8080
+ },
+ {
+ "epoch": 1.1354385964912281,
+ "grad_norm": 1.1499663591384888,
+ "learning_rate": 4.392554900875619e-05,
+ "loss": 0.5516,
+ "step": 8090
+ },
+ {
+ "epoch": 1.1368421052631579,
+ "grad_norm": 1.14556884765625,
+ "learning_rate": 4.3911128755991085e-05,
+ "loss": 0.4377,
+ "step": 8100
+ },
+ {
+ "epoch": 1.1382456140350876,
+ "grad_norm": 2.168900728225708,
+ "learning_rate": 4.3896693780264054e-05,
+ "loss": 0.5489,
+ "step": 8110
+ },
+ {
+ "epoch": 1.1396491228070176,
+ "grad_norm": 1.8360158205032349,
+ "learning_rate": 4.388224409281324e-05,
+ "loss": 0.4883,
+ "step": 8120
+ },
+ {
+ "epoch": 1.1410526315789473,
+ "grad_norm": 1.3180638551712036,
+ "learning_rate": 4.3867779704888225e-05,
+ "loss": 0.5316,
+ "step": 8130
+ },
+ {
+ "epoch": 1.1424561403508773,
+ "grad_norm": 1.194568157196045,
+ "learning_rate": 4.385330062775001e-05,
+ "loss": 0.5961,
+ "step": 8140
+ },
+ {
+ "epoch": 1.143859649122807,
+ "grad_norm": 1.7998569011688232,
+ "learning_rate": 4.383880687267107e-05,
+ "loss": 0.5839,
+ "step": 8150
+ },
+ {
+ "epoch": 1.1452631578947368,
+ "grad_norm": 1.313109040260315,
+ "learning_rate": 4.3824298450935284e-05,
+ "loss": 0.4834,
+ "step": 8160
+ },
+ {
+ "epoch": 1.1466666666666667,
+ "grad_norm": 1.082961916923523,
+ "learning_rate": 4.380977537383796e-05,
+ "loss": 0.5543,
+ "step": 8170
+ },
+ {
+ "epoch": 1.1480701754385965,
+ "grad_norm": 1.8646924495697021,
+ "learning_rate": 4.37952376526858e-05,
+ "loss": 0.5548,
+ "step": 8180
+ },
+ {
+ "epoch": 1.1494736842105264,
+ "grad_norm": 0.8285521268844604,
+ "learning_rate": 4.378068529879693e-05,
+ "loss": 0.4596,
+ "step": 8190
+ },
+ {
+ "epoch": 1.1508771929824562,
+ "grad_norm": 1.6364754438400269,
+ "learning_rate": 4.376611832350085e-05,
+ "loss": 0.5165,
+ "step": 8200
+ },
+ {
+ "epoch": 1.152280701754386,
+ "grad_norm": 1.4383785724639893,
+ "learning_rate": 4.3751536738138454e-05,
+ "loss": 0.5085,
+ "step": 8210
+ },
+ {
+ "epoch": 1.1536842105263159,
+ "grad_norm": 1.0523866415023804,
+ "learning_rate": 4.3736940554062e-05,
+ "loss": 0.5485,
+ "step": 8220
+ },
+ {
+ "epoch": 1.1550877192982456,
+ "grad_norm": 2.191441535949707,
+ "learning_rate": 4.372232978263513e-05,
+ "loss": 0.5049,
+ "step": 8230
+ },
+ {
+ "epoch": 1.1564912280701753,
+ "grad_norm": 1.923846960067749,
+ "learning_rate": 4.3707704435232816e-05,
+ "loss": 0.4833,
+ "step": 8240
+ },
+ {
+ "epoch": 1.1578947368421053,
+ "grad_norm": 1.855549693107605,
+ "learning_rate": 4.36930645232414e-05,
+ "loss": 0.5616,
+ "step": 8250
+ },
+ {
+ "epoch": 1.159298245614035,
+ "grad_norm": 1.620718240737915,
+ "learning_rate": 4.367841005805855e-05,
+ "loss": 0.5448,
+ "step": 8260
+ },
+ {
+ "epoch": 1.1607017543859648,
+ "grad_norm": 1.5233041048049927,
+ "learning_rate": 4.366374105109327e-05,
+ "loss": 0.4714,
+ "step": 8270
+ },
+ {
+ "epoch": 1.1621052631578948,
+ "grad_norm": 1.2673170566558838,
+ "learning_rate": 4.364905751376589e-05,
+ "loss": 0.5994,
+ "step": 8280
+ },
+ {
+ "epoch": 1.1635087719298245,
+ "grad_norm": 1.4488414525985718,
+ "learning_rate": 4.3634359457508046e-05,
+ "loss": 0.5633,
+ "step": 8290
+ },
+ {
+ "epoch": 1.1649122807017545,
+ "grad_norm": 2.300537586212158,
+ "learning_rate": 4.3619646893762675e-05,
+ "loss": 0.5566,
+ "step": 8300
+ },
+ {
+ "epoch": 1.1663157894736842,
+ "grad_norm": 0.8948672413825989,
+ "learning_rate": 4.360491983398402e-05,
+ "loss": 0.4631,
+ "step": 8310
+ },
+ {
+ "epoch": 1.167719298245614,
+ "grad_norm": 1.453062653541565,
+ "learning_rate": 4.3590178289637585e-05,
+ "loss": 0.4525,
+ "step": 8320
+ },
+ {
+ "epoch": 1.169122807017544,
+ "grad_norm": 1.201952576637268,
+ "learning_rate": 4.357542227220019e-05,
+ "loss": 0.4501,
+ "step": 8330
+ },
+ {
+ "epoch": 1.1705263157894736,
+ "grad_norm": 1.3226593732833862,
+ "learning_rate": 4.356065179315988e-05,
+ "loss": 0.561,
+ "step": 8340
+ },
+ {
+ "epoch": 1.1719298245614036,
+ "grad_norm": 0.9465067386627197,
+ "learning_rate": 4.354586686401599e-05,
+ "loss": 0.513,
+ "step": 8350
+ },
+ {
+ "epoch": 1.1733333333333333,
+ "grad_norm": 1.12758207321167,
+ "learning_rate": 4.353106749627909e-05,
+ "loss": 0.4915,
+ "step": 8360
+ },
+ {
+ "epoch": 1.174736842105263,
+ "grad_norm": 2.4800631999969482,
+ "learning_rate": 4.3516253701471e-05,
+ "loss": 0.5193,
+ "step": 8370
+ },
+ {
+ "epoch": 1.176140350877193,
+ "grad_norm": 0.8092995285987854,
+ "learning_rate": 4.350142549112476e-05,
+ "loss": 0.5881,
+ "step": 8380
+ },
+ {
+ "epoch": 1.1775438596491228,
+ "grad_norm": 1.0919562578201294,
+ "learning_rate": 4.348658287678465e-05,
+ "loss": 0.5378,
+ "step": 8390
+ },
+ {
+ "epoch": 1.1789473684210527,
+ "grad_norm": 1.3849503993988037,
+ "learning_rate": 4.347172587000614e-05,
+ "loss": 0.5356,
+ "step": 8400
+ },
+ {
+ "epoch": 1.1803508771929825,
+ "grad_norm": 2.6931231021881104,
+ "learning_rate": 4.345685448235594e-05,
+ "loss": 0.5849,
+ "step": 8410
+ },
+ {
+ "epoch": 1.1817543859649122,
+ "grad_norm": 1.188615083694458,
+ "learning_rate": 4.3441968725411905e-05,
+ "loss": 0.5157,
+ "step": 8420
+ },
+ {
+ "epoch": 1.1831578947368422,
+ "grad_norm": 2.472364902496338,
+ "learning_rate": 4.342706861076313e-05,
+ "loss": 0.6508,
+ "step": 8430
+ },
+ {
+ "epoch": 1.184561403508772,
+ "grad_norm": 1.9964373111724854,
+ "learning_rate": 4.341215415000987e-05,
+ "loss": 0.525,
+ "step": 8440
+ },
+ {
+ "epoch": 1.1859649122807017,
+ "grad_norm": 1.2414706945419312,
+ "learning_rate": 4.339722535476353e-05,
+ "loss": 0.6218,
+ "step": 8450
+ },
+ {
+ "epoch": 1.1873684210526316,
+ "grad_norm": 1.69329035282135,
+ "learning_rate": 4.3382282236646684e-05,
+ "loss": 0.5375,
+ "step": 8460
+ },
+ {
+ "epoch": 1.1887719298245614,
+ "grad_norm": 0.9698866605758667,
+ "learning_rate": 4.336732480729306e-05,
+ "loss": 0.5454,
+ "step": 8470
+ },
+ {
+ "epoch": 1.190175438596491,
+ "grad_norm": 1.450108528137207,
+ "learning_rate": 4.335235307834755e-05,
+ "loss": 0.5507,
+ "step": 8480
+ },
+ {
+ "epoch": 1.191578947368421,
+ "grad_norm": 1.593243956565857,
+ "learning_rate": 4.333736706146615e-05,
+ "loss": 0.5172,
+ "step": 8490
+ },
+ {
+ "epoch": 1.1929824561403508,
+ "grad_norm": 0.9896023869514465,
+ "learning_rate": 4.332236676831598e-05,
+ "loss": 0.5401,
+ "step": 8500
+ },
+ {
+ "epoch": 1.1943859649122808,
+ "grad_norm": 1.9976292848587036,
+ "learning_rate": 4.330735221057529e-05,
+ "loss": 0.4734,
+ "step": 8510
+ },
+ {
+ "epoch": 1.1957894736842105,
+ "grad_norm": 0.9883520007133484,
+ "learning_rate": 4.329232339993342e-05,
+ "loss": 0.4783,
+ "step": 8520
+ },
+ {
+ "epoch": 1.1971929824561403,
+ "grad_norm": 1.6024073362350464,
+ "learning_rate": 4.327728034809082e-05,
+ "loss": 0.5843,
+ "step": 8530
+ },
+ {
+ "epoch": 1.1985964912280702,
+ "grad_norm": 1.5246341228485107,
+ "learning_rate": 4.326222306675902e-05,
+ "loss": 0.4922,
+ "step": 8540
+ },
+ {
+ "epoch": 1.2,
+ "grad_norm": 1.8065810203552246,
+ "learning_rate": 4.324715156766064e-05,
+ "loss": 0.6196,
+ "step": 8550
+ },
+ {
+ "epoch": 1.20140350877193,
+ "grad_norm": 1.25635826587677,
+ "learning_rate": 4.3232065862529334e-05,
+ "loss": 0.4713,
+ "step": 8560
+ },
+ {
+ "epoch": 1.2028070175438597,
+ "grad_norm": 1.874711036682129,
+ "learning_rate": 4.321696596310987e-05,
+ "loss": 0.5015,
+ "step": 8570
+ },
+ {
+ "epoch": 1.2042105263157894,
+ "grad_norm": 1.4795438051223755,
+ "learning_rate": 4.3201851881158004e-05,
+ "loss": 0.569,
+ "step": 8580
+ },
+ {
+ "epoch": 1.2056140350877194,
+ "grad_norm": 1.1996725797653198,
+ "learning_rate": 4.31867236284406e-05,
+ "loss": 0.5079,
+ "step": 8590
+ },
+ {
+ "epoch": 1.207017543859649,
+ "grad_norm": 1.1284021139144897,
+ "learning_rate": 4.31715812167355e-05,
+ "loss": 0.5132,
+ "step": 8600
+ },
+ {
+ "epoch": 1.208421052631579,
+ "grad_norm": 1.3568930625915527,
+ "learning_rate": 4.3156424657831596e-05,
+ "loss": 0.5907,
+ "step": 8610
+ },
+ {
+ "epoch": 1.2098245614035088,
+ "grad_norm": 2.9363083839416504,
+ "learning_rate": 4.3141253963528795e-05,
+ "loss": 0.6086,
+ "step": 8620
+ },
+ {
+ "epoch": 1.2112280701754385,
+ "grad_norm": 1.58176589012146,
+ "learning_rate": 4.3126069145637987e-05,
+ "loss": 0.4966,
+ "step": 8630
+ },
+ {
+ "epoch": 1.2126315789473685,
+ "grad_norm": 1.1019052267074585,
+ "learning_rate": 4.3110870215981095e-05,
+ "loss": 0.5713,
+ "step": 8640
+ },
+ {
+ "epoch": 1.2140350877192982,
+ "grad_norm": 1.8327674865722656,
+ "learning_rate": 4.309565718639098e-05,
+ "loss": 0.5538,
+ "step": 8650
+ },
+ {
+ "epoch": 1.215438596491228,
+ "grad_norm": 1.4098116159439087,
+ "learning_rate": 4.308043006871153e-05,
+ "loss": 0.5065,
+ "step": 8660
+ },
+ {
+ "epoch": 1.216842105263158,
+ "grad_norm": 1.7343579530715942,
+ "learning_rate": 4.306518887479758e-05,
+ "loss": 0.495,
+ "step": 8670
+ },
+ {
+ "epoch": 1.2182456140350877,
+ "grad_norm": 1.1002309322357178,
+ "learning_rate": 4.3049933616514895e-05,
+ "loss": 0.5217,
+ "step": 8680
+ },
+ {
+ "epoch": 1.2196491228070174,
+ "grad_norm": 1.6965640783309937,
+ "learning_rate": 4.303466430574024e-05,
+ "loss": 0.5196,
+ "step": 8690
+ },
+ {
+ "epoch": 1.2210526315789474,
+ "grad_norm": 2.227039337158203,
+ "learning_rate": 4.301938095436129e-05,
+ "loss": 0.4687,
+ "step": 8700
+ },
+ {
+ "epoch": 1.2224561403508771,
+ "grad_norm": 1.9886293411254883,
+ "learning_rate": 4.300408357427666e-05,
+ "loss": 0.6043,
+ "step": 8710
+ },
+ {
+ "epoch": 1.223859649122807,
+ "grad_norm": 1.9546360969543457,
+ "learning_rate": 4.298877217739587e-05,
+ "loss": 0.5359,
+ "step": 8720
+ },
+ {
+ "epoch": 1.2252631578947368,
+ "grad_norm": 1.1686962842941284,
+ "learning_rate": 4.29734467756394e-05,
+ "loss": 0.4502,
+ "step": 8730
+ },
+ {
+ "epoch": 1.2266666666666666,
+ "grad_norm": 1.1630245447158813,
+ "learning_rate": 4.2958107380938564e-05,
+ "loss": 0.4823,
+ "step": 8740
+ },
+ {
+ "epoch": 1.2280701754385965,
+ "grad_norm": 1.3308658599853516,
+ "learning_rate": 4.294275400523564e-05,
+ "loss": 0.4295,
+ "step": 8750
+ },
+ {
+ "epoch": 1.2294736842105263,
+ "grad_norm": 1.694773554801941,
+ "learning_rate": 4.2927386660483726e-05,
+ "loss": 0.4983,
+ "step": 8760
+ },
+ {
+ "epoch": 1.2308771929824562,
+ "grad_norm": 0.8299292325973511,
+ "learning_rate": 4.291200535864684e-05,
+ "loss": 0.5405,
+ "step": 8770
+ },
+ {
+ "epoch": 1.232280701754386,
+ "grad_norm": 1.7378555536270142,
+ "learning_rate": 4.289661011169986e-05,
+ "loss": 0.5094,
+ "step": 8780
+ },
+ {
+ "epoch": 1.2336842105263157,
+ "grad_norm": 1.8789598941802979,
+ "learning_rate": 4.28812009316285e-05,
+ "loss": 0.5622,
+ "step": 8790
+ },
+ {
+ "epoch": 1.2350877192982457,
+ "grad_norm": 1.2347322702407837,
+ "learning_rate": 4.286577783042934e-05,
+ "loss": 0.4577,
+ "step": 8800
+ },
+ {
+ "epoch": 1.2364912280701754,
+ "grad_norm": 1.610954761505127,
+ "learning_rate": 4.285034082010981e-05,
+ "loss": 0.6015,
+ "step": 8810
+ },
+ {
+ "epoch": 1.2378947368421054,
+ "grad_norm": 0.8974846005439758,
+ "learning_rate": 4.2834889912688126e-05,
+ "loss": 0.5716,
+ "step": 8820
+ },
+ {
+ "epoch": 1.2392982456140351,
+ "grad_norm": 1.1207072734832764,
+ "learning_rate": 4.281942512019336e-05,
+ "loss": 0.5634,
+ "step": 8830
+ },
+ {
+ "epoch": 1.2407017543859649,
+ "grad_norm": 2.1318647861480713,
+ "learning_rate": 4.2803946454665376e-05,
+ "loss": 0.4982,
+ "step": 8840
+ },
+ {
+ "epoch": 1.2421052631578948,
+ "grad_norm": 1.3747590780258179,
+ "learning_rate": 4.2788453928154855e-05,
+ "loss": 0.5006,
+ "step": 8850
+ },
+ {
+ "epoch": 1.2435087719298246,
+ "grad_norm": 0.8502065539360046,
+ "learning_rate": 4.2772947552723266e-05,
+ "loss": 0.4901,
+ "step": 8860
+ },
+ {
+ "epoch": 1.2449122807017543,
+ "grad_norm": 1.888156771659851,
+ "learning_rate": 4.275742734044283e-05,
+ "loss": 0.4847,
+ "step": 8870
+ },
+ {
+ "epoch": 1.2463157894736843,
+ "grad_norm": 2.0071113109588623,
+ "learning_rate": 4.274189330339658e-05,
+ "loss": 0.5224,
+ "step": 8880
+ },
+ {
+ "epoch": 1.247719298245614,
+ "grad_norm": 1.0914371013641357,
+ "learning_rate": 4.272634545367831e-05,
+ "loss": 0.4698,
+ "step": 8890
+ },
+ {
+ "epoch": 1.2491228070175437,
+ "grad_norm": 1.4466750621795654,
+ "learning_rate": 4.271078380339252e-05,
+ "loss": 0.5801,
+ "step": 8900
+ },
+ {
+ "epoch": 1.2505263157894737,
+ "grad_norm": 1.5080820322036743,
+ "learning_rate": 4.269520836465452e-05,
+ "loss": 0.6584,
+ "step": 8910
+ },
+ {
+ "epoch": 1.2519298245614034,
+ "grad_norm": 1.510321855545044,
+ "learning_rate": 4.2679619149590304e-05,
+ "loss": 0.5752,
+ "step": 8920
+ },
+ {
+ "epoch": 1.2533333333333334,
+ "grad_norm": 1.527969479560852,
+ "learning_rate": 4.266401617033662e-05,
+ "loss": 0.4829,
+ "step": 8930
+ },
+ {
+ "epoch": 1.2547368421052632,
+ "grad_norm": 1.2593231201171875,
+ "learning_rate": 4.264839943904091e-05,
+ "loss": 0.5411,
+ "step": 8940
+ },
+ {
+ "epoch": 1.256140350877193,
+ "grad_norm": 1.490929365158081,
+ "learning_rate": 4.2632768967861345e-05,
+ "loss": 0.5089,
+ "step": 8950
+ },
+ {
+ "epoch": 1.2575438596491229,
+ "grad_norm": 0.7822336554527283,
+ "learning_rate": 4.261712476896679e-05,
+ "loss": 0.6257,
+ "step": 8960
+ },
+ {
+ "epoch": 1.2589473684210526,
+ "grad_norm": 1.331175446510315,
+ "learning_rate": 4.2601466854536774e-05,
+ "loss": 0.5403,
+ "step": 8970
+ },
+ {
+ "epoch": 1.2603508771929826,
+ "grad_norm": 1.4372813701629639,
+ "learning_rate": 4.2585795236761526e-05,
+ "loss": 0.5305,
+ "step": 8980
+ },
+ {
+ "epoch": 1.2617543859649123,
+ "grad_norm": 1.873630166053772,
+ "learning_rate": 4.257010992784194e-05,
+ "loss": 0.5776,
+ "step": 8990
+ },
+ {
+ "epoch": 1.263157894736842,
+ "grad_norm": 1.2255460023880005,
+ "learning_rate": 4.255441093998956e-05,
+ "loss": 0.5772,
+ "step": 9000
+ },
+ {
+ "epoch": 1.264561403508772,
+ "grad_norm": 1.3667577505111694,
+ "learning_rate": 4.253869828542659e-05,
+ "loss": 0.589,
+ "step": 9010
+ },
+ {
+ "epoch": 1.2659649122807017,
+ "grad_norm": 1.189122200012207,
+ "learning_rate": 4.2522971976385876e-05,
+ "loss": 0.5012,
+ "step": 9020
+ },
+ {
+ "epoch": 1.2673684210526317,
+ "grad_norm": 2.0150930881500244,
+ "learning_rate": 4.250723202511089e-05,
+ "loss": 0.4813,
+ "step": 9030
+ },
+ {
+ "epoch": 1.2687719298245614,
+ "grad_norm": 1.83956778049469,
+ "learning_rate": 4.2491478443855704e-05,
+ "loss": 0.513,
+ "step": 9040
+ },
+ {
+ "epoch": 1.2701754385964912,
+ "grad_norm": 1.8281301259994507,
+ "learning_rate": 4.247571124488504e-05,
+ "loss": 0.6229,
+ "step": 9050
+ },
+ {
+ "epoch": 1.271578947368421,
+ "grad_norm": 1.5498483180999756,
+ "learning_rate": 4.2459930440474194e-05,
+ "loss": 0.5493,
+ "step": 9060
+ },
+ {
+ "epoch": 1.2729824561403509,
+ "grad_norm": 1.680643081665039,
+ "learning_rate": 4.2444136042909064e-05,
+ "loss": 0.4845,
+ "step": 9070
+ },
+ {
+ "epoch": 1.2743859649122806,
+ "grad_norm": 1.4468814134597778,
+ "learning_rate": 4.2428328064486134e-05,
+ "loss": 0.5174,
+ "step": 9080
+ },
+ {
+ "epoch": 1.2757894736842106,
+ "grad_norm": 1.8637295961380005,
+ "learning_rate": 4.2412506517512456e-05,
+ "loss": 0.501,
+ "step": 9090
+ },
+ {
+ "epoch": 1.2771929824561403,
+ "grad_norm": 1.8078296184539795,
+ "learning_rate": 4.239667141430564e-05,
+ "loss": 0.6422,
+ "step": 9100
+ },
+ {
+ "epoch": 1.27859649122807,
+ "grad_norm": 1.8999830484390259,
+ "learning_rate": 4.238082276719387e-05,
+ "loss": 0.5323,
+ "step": 9110
+ },
+ {
+ "epoch": 1.28,
+ "grad_norm": 0.8832138776779175,
+ "learning_rate": 4.236496058851585e-05,
+ "loss": 0.4542,
+ "step": 9120
+ },
+ {
+ "epoch": 1.2814035087719298,
+ "grad_norm": 1.2980352640151978,
+ "learning_rate": 4.234908489062083e-05,
+ "loss": 0.5697,
+ "step": 9130
+ },
+ {
+ "epoch": 1.2828070175438597,
+ "grad_norm": 1.667039394378662,
+ "learning_rate": 4.233319568586859e-05,
+ "loss": 0.5108,
+ "step": 9140
+ },
+ {
+ "epoch": 1.2842105263157895,
+ "grad_norm": 1.6664785146713257,
+ "learning_rate": 4.231729298662942e-05,
+ "loss": 0.4472,
+ "step": 9150
+ },
+ {
+ "epoch": 1.2856140350877192,
+ "grad_norm": 1.4384082555770874,
+ "learning_rate": 4.230137680528411e-05,
+ "loss": 0.62,
+ "step": 9160
+ },
+ {
+ "epoch": 1.2870175438596492,
+ "grad_norm": 1.851901888847351,
+ "learning_rate": 4.228544715422395e-05,
+ "loss": 0.5226,
+ "step": 9170
+ },
+ {
+ "epoch": 1.288421052631579,
+ "grad_norm": 1.372755527496338,
+ "learning_rate": 4.2269504045850744e-05,
+ "loss": 0.5492,
+ "step": 9180
+ },
+ {
+ "epoch": 1.2898245614035089,
+ "grad_norm": 1.7806882858276367,
+ "learning_rate": 4.225354749257673e-05,
+ "loss": 0.5359,
+ "step": 9190
+ },
+ {
+ "epoch": 1.2912280701754386,
+ "grad_norm": 1.4967597723007202,
+ "learning_rate": 4.2237577506824624e-05,
+ "loss": 0.535,
+ "step": 9200
+ },
+ {
+ "epoch": 1.2926315789473684,
+ "grad_norm": 1.220828890800476,
+ "learning_rate": 4.222159410102761e-05,
+ "loss": 0.4581,
+ "step": 9210
+ },
+ {
+ "epoch": 1.2940350877192983,
+ "grad_norm": 1.9541898965835571,
+ "learning_rate": 4.220559728762933e-05,
+ "loss": 0.5109,
+ "step": 9220
+ },
+ {
+ "epoch": 1.295438596491228,
+ "grad_norm": 0.9027903079986572,
+ "learning_rate": 4.2189587079083846e-05,
+ "loss": 0.4501,
+ "step": 9230
+ },
+ {
+ "epoch": 1.296842105263158,
+ "grad_norm": 2.4572014808654785,
+ "learning_rate": 4.217356348785565e-05,
+ "loss": 0.5574,
+ "step": 9240
+ },
+ {
+ "epoch": 1.2982456140350878,
+ "grad_norm": 1.5705862045288086,
+ "learning_rate": 4.215752652641967e-05,
+ "loss": 0.5558,
+ "step": 9250
+ },
+ {
+ "epoch": 1.2996491228070175,
+ "grad_norm": 1.5693955421447754,
+ "learning_rate": 4.21414762072612e-05,
+ "loss": 0.4734,
+ "step": 9260
+ },
+ {
+ "epoch": 1.3010526315789472,
+ "grad_norm": 1.4699418544769287,
+ "learning_rate": 4.2125412542876e-05,
+ "loss": 0.574,
+ "step": 9270
+ },
+ {
+ "epoch": 1.3024561403508772,
+ "grad_norm": 1.8956423997879028,
+ "learning_rate": 4.210933554577016e-05,
+ "loss": 0.4505,
+ "step": 9280
+ },
+ {
+ "epoch": 1.303859649122807,
+ "grad_norm": 1.8722734451293945,
+ "learning_rate": 4.209324522846018e-05,
+ "loss": 0.5021,
+ "step": 9290
+ },
+ {
+ "epoch": 1.305263157894737,
+ "grad_norm": 1.3624267578125,
+ "learning_rate": 4.207714160347292e-05,
+ "loss": 0.4925,
+ "step": 9300
+ },
+ {
+ "epoch": 1.3066666666666666,
+ "grad_norm": 2.2316009998321533,
+ "learning_rate": 4.206102468334561e-05,
+ "loss": 0.6289,
+ "step": 9310
+ },
+ {
+ "epoch": 1.3080701754385964,
+ "grad_norm": 2.3992788791656494,
+ "learning_rate": 4.2044894480625825e-05,
+ "loss": 0.5387,
+ "step": 9320
+ },
+ {
+ "epoch": 1.3094736842105263,
+ "grad_norm": 1.9581996202468872,
+ "learning_rate": 4.202875100787147e-05,
+ "loss": 0.5788,
+ "step": 9330
+ },
+ {
+ "epoch": 1.310877192982456,
+ "grad_norm": 1.5940258502960205,
+ "learning_rate": 4.201259427765081e-05,
+ "loss": 0.5313,
+ "step": 9340
+ },
+ {
+ "epoch": 1.312280701754386,
+ "grad_norm": 1.2207392454147339,
+ "learning_rate": 4.1996424302542404e-05,
+ "loss": 0.5948,
+ "step": 9350
+ },
+ {
+ "epoch": 1.3136842105263158,
+ "grad_norm": 1.743915319442749,
+ "learning_rate": 4.198024109513512e-05,
+ "loss": 0.4913,
+ "step": 9360
+ },
+ {
+ "epoch": 1.3150877192982455,
+ "grad_norm": 1.9989562034606934,
+ "learning_rate": 4.196404466802816e-05,
+ "loss": 0.4895,
+ "step": 9370
+ },
+ {
+ "epoch": 1.3164912280701755,
+ "grad_norm": 1.8793307542800903,
+ "learning_rate": 4.194783503383098e-05,
+ "loss": 0.5537,
+ "step": 9380
+ },
+ {
+ "epoch": 1.3178947368421052,
+ "grad_norm": 1.9246269464492798,
+ "learning_rate": 4.193161220516334e-05,
+ "loss": 0.5641,
+ "step": 9390
+ },
+ {
+ "epoch": 1.3192982456140352,
+ "grad_norm": 1.5612519979476929,
+ "learning_rate": 4.191537619465529e-05,
+ "loss": 0.464,
+ "step": 9400
+ },
+ {
+ "epoch": 1.320701754385965,
+ "grad_norm": 0.9451802968978882,
+ "learning_rate": 4.189912701494709e-05,
+ "loss": 0.4657,
+ "step": 9410
+ },
+ {
+ "epoch": 1.3221052631578947,
+ "grad_norm": 1.790861964225769,
+ "learning_rate": 4.1882864678689296e-05,
+ "loss": 0.5113,
+ "step": 9420
+ },
+ {
+ "epoch": 1.3235087719298246,
+ "grad_norm": 1.9305384159088135,
+ "learning_rate": 4.186658919854269e-05,
+ "loss": 0.5593,
+ "step": 9430
+ },
+ {
+ "epoch": 1.3249122807017544,
+ "grad_norm": 2.051849603652954,
+ "learning_rate": 4.1850300587178304e-05,
+ "loss": 0.4578,
+ "step": 9440
+ },
+ {
+ "epoch": 1.3263157894736843,
+ "grad_norm": 1.7359352111816406,
+ "learning_rate": 4.183399885727737e-05,
+ "loss": 0.5637,
+ "step": 9450
+ },
+ {
+ "epoch": 1.327719298245614,
+ "grad_norm": 1.373119592666626,
+ "learning_rate": 4.181768402153135e-05,
+ "loss": 0.5491,
+ "step": 9460
+ },
+ {
+ "epoch": 1.3291228070175438,
+ "grad_norm": 1.0692249536514282,
+ "learning_rate": 4.1801356092641886e-05,
+ "loss": 0.5558,
+ "step": 9470
+ },
+ {
+ "epoch": 1.3305263157894736,
+ "grad_norm": 1.0997167825698853,
+ "learning_rate": 4.178501508332085e-05,
+ "loss": 0.4543,
+ "step": 9480
+ },
+ {
+ "epoch": 1.3319298245614035,
+ "grad_norm": 1.5097479820251465,
+ "learning_rate": 4.176866100629027e-05,
+ "loss": 0.5832,
+ "step": 9490
+ },
+ {
+ "epoch": 1.3333333333333333,
+ "grad_norm": 1.5708390474319458,
+ "learning_rate": 4.175229387428235e-05,
+ "loss": 0.5378,
+ "step": 9500
+ },
+ {
+ "epoch": 1.3347368421052632,
+ "grad_norm": 1.6936376094818115,
+ "learning_rate": 4.1735913700039477e-05,
+ "loss": 0.5046,
+ "step": 9510
+ },
+ {
+ "epoch": 1.336140350877193,
+ "grad_norm": 0.9409717321395874,
+ "learning_rate": 4.171952049631416e-05,
+ "loss": 0.5171,
+ "step": 9520
+ },
+ {
+ "epoch": 1.3375438596491227,
+ "grad_norm": 1.803077220916748,
+ "learning_rate": 4.170311427586908e-05,
+ "loss": 0.5939,
+ "step": 9530
+ },
+ {
+ "epoch": 1.3389473684210527,
+ "grad_norm": 1.1349605321884155,
+ "learning_rate": 4.168669505147705e-05,
+ "loss": 0.5768,
+ "step": 9540
+ },
+ {
+ "epoch": 1.3403508771929824,
+ "grad_norm": 1.6885027885437012,
+ "learning_rate": 4.1670262835920996e-05,
+ "loss": 0.5029,
+ "step": 9550
+ },
+ {
+ "epoch": 1.3417543859649124,
+ "grad_norm": 1.278064489364624,
+ "learning_rate": 4.1653817641993936e-05,
+ "loss": 0.4611,
+ "step": 9560
+ },
+ {
+ "epoch": 1.343157894736842,
+ "grad_norm": 1.7429572343826294,
+ "learning_rate": 4.163735948249905e-05,
+ "loss": 0.5701,
+ "step": 9570
+ },
+ {
+ "epoch": 1.3445614035087718,
+ "grad_norm": 2.2477900981903076,
+ "learning_rate": 4.162088837024956e-05,
+ "loss": 0.5356,
+ "step": 9580
+ },
+ {
+ "epoch": 1.3459649122807018,
+ "grad_norm": 1.617583990097046,
+ "learning_rate": 4.16044043180688e-05,
+ "loss": 0.4985,
+ "step": 9590
+ },
+ {
+ "epoch": 1.3473684210526315,
+ "grad_norm": 1.5791269540786743,
+ "learning_rate": 4.158790733879017e-05,
+ "loss": 0.5036,
+ "step": 9600
+ },
+ {
+ "epoch": 1.3487719298245615,
+ "grad_norm": 1.9323596954345703,
+ "learning_rate": 4.1571397445257124e-05,
+ "loss": 0.5212,
+ "step": 9610
+ },
+ {
+ "epoch": 1.3501754385964913,
+ "grad_norm": 1.3054085969924927,
+ "learning_rate": 4.155487465032319e-05,
+ "loss": 0.5225,
+ "step": 9620
+ },
+ {
+ "epoch": 1.351578947368421,
+ "grad_norm": 1.5751895904541016,
+ "learning_rate": 4.153833896685193e-05,
+ "loss": 0.4985,
+ "step": 9630
+ },
+ {
+ "epoch": 1.352982456140351,
+ "grad_norm": 1.8643230199813843,
+ "learning_rate": 4.1521790407716936e-05,
+ "loss": 0.5386,
+ "step": 9640
+ },
+ {
+ "epoch": 1.3543859649122807,
+ "grad_norm": 1.0118595361709595,
+ "learning_rate": 4.150522898580183e-05,
+ "loss": 0.5283,
+ "step": 9650
+ },
+ {
+ "epoch": 1.3557894736842107,
+ "grad_norm": 1.9065098762512207,
+ "learning_rate": 4.148865471400024e-05,
+ "loss": 0.5684,
+ "step": 9660
+ },
+ {
+ "epoch": 1.3571929824561404,
+ "grad_norm": 2.0933990478515625,
+ "learning_rate": 4.147206760521582e-05,
+ "loss": 0.525,
+ "step": 9670
+ },
+ {
+ "epoch": 1.3585964912280701,
+ "grad_norm": 2.0099165439605713,
+ "learning_rate": 4.145546767236219e-05,
+ "loss": 0.5258,
+ "step": 9680
+ },
+ {
+ "epoch": 1.3599999999999999,
+ "grad_norm": 2.2704153060913086,
+ "learning_rate": 4.143885492836297e-05,
+ "loss": 0.5159,
+ "step": 9690
+ },
+ {
+ "epoch": 1.3614035087719298,
+ "grad_norm": 1.3344398736953735,
+ "learning_rate": 4.1422229386151754e-05,
+ "loss": 0.5656,
+ "step": 9700
+ },
+ {
+ "epoch": 1.3628070175438596,
+ "grad_norm": 2.356660842895508,
+ "learning_rate": 4.140559105867209e-05,
+ "loss": 0.488,
+ "step": 9710
+ },
+ {
+ "epoch": 1.3642105263157895,
+ "grad_norm": 1.0358322858810425,
+ "learning_rate": 4.1388939958877495e-05,
+ "loss": 0.457,
+ "step": 9720
+ },
+ {
+ "epoch": 1.3656140350877193,
+ "grad_norm": 1.4958525896072388,
+ "learning_rate": 4.137227609973141e-05,
+ "loss": 0.459,
+ "step": 9730
+ },
+ {
+ "epoch": 1.367017543859649,
+ "grad_norm": 1.9942265748977661,
+ "learning_rate": 4.135559949420723e-05,
+ "loss": 0.4794,
+ "step": 9740
+ },
+ {
+ "epoch": 1.368421052631579,
+ "grad_norm": 1.7793415784835815,
+ "learning_rate": 4.133891015528826e-05,
+ "loss": 0.5903,
+ "step": 9750
+ },
+ {
+ "epoch": 1.3698245614035087,
+ "grad_norm": 1.069421410560608,
+ "learning_rate": 4.132220809596772e-05,
+ "loss": 0.5521,
+ "step": 9760
+ },
+ {
+ "epoch": 1.3712280701754387,
+ "grad_norm": 0.8958350419998169,
+ "learning_rate": 4.1305493329248734e-05,
+ "loss": 0.4667,
+ "step": 9770
+ },
+ {
+ "epoch": 1.3726315789473684,
+ "grad_norm": 1.314070701599121,
+ "learning_rate": 4.128876586814433e-05,
+ "loss": 0.4307,
+ "step": 9780
+ },
+ {
+ "epoch": 1.3740350877192982,
+ "grad_norm": 1.3073476552963257,
+ "learning_rate": 4.127202572567741e-05,
+ "loss": 0.5016,
+ "step": 9790
+ },
+ {
+ "epoch": 1.3754385964912281,
+ "grad_norm": 1.695670247077942,
+ "learning_rate": 4.1255272914880735e-05,
+ "loss": 0.5489,
+ "step": 9800
+ },
+ {
+ "epoch": 1.3768421052631579,
+ "grad_norm": 1.6946247816085815,
+ "learning_rate": 4.1238507448796945e-05,
+ "loss": 0.488,
+ "step": 9810
+ },
+ {
+ "epoch": 1.3782456140350878,
+ "grad_norm": 1.3960559368133545,
+ "learning_rate": 4.122172934047855e-05,
+ "loss": 0.5739,
+ "step": 9820
+ },
+ {
+ "epoch": 1.3796491228070176,
+ "grad_norm": 1.4782212972640991,
+ "learning_rate": 4.120493860298786e-05,
+ "loss": 0.5036,
+ "step": 9830
+ },
+ {
+ "epoch": 1.3810526315789473,
+ "grad_norm": 1.7010905742645264,
+ "learning_rate": 4.1188135249397056e-05,
+ "loss": 0.4737,
+ "step": 9840
+ },
+ {
+ "epoch": 1.3824561403508773,
+ "grad_norm": 1.8230018615722656,
+ "learning_rate": 4.117131929278811e-05,
+ "loss": 0.5341,
+ "step": 9850
+ },
+ {
+ "epoch": 1.383859649122807,
+ "grad_norm": 1.5947978496551514,
+ "learning_rate": 4.1154490746252825e-05,
+ "loss": 0.4567,
+ "step": 9860
+ },
+ {
+ "epoch": 1.385263157894737,
+ "grad_norm": 2.077136516571045,
+ "learning_rate": 4.113764962289281e-05,
+ "loss": 0.5586,
+ "step": 9870
+ },
+ {
+ "epoch": 1.3866666666666667,
+ "grad_norm": 1.0836787223815918,
+ "learning_rate": 4.112079593581944e-05,
+ "loss": 0.5065,
+ "step": 9880
+ },
+ {
+ "epoch": 1.3880701754385965,
+ "grad_norm": 1.8127710819244385,
+ "learning_rate": 4.110392969815391e-05,
+ "loss": 0.5335,
+ "step": 9890
+ },
+ {
+ "epoch": 1.3894736842105262,
+ "grad_norm": 1.7939358949661255,
+ "learning_rate": 4.108705092302715e-05,
+ "loss": 0.5445,
+ "step": 9900
+ },
+ {
+ "epoch": 1.3908771929824562,
+ "grad_norm": 1.5963224172592163,
+ "learning_rate": 4.1070159623579855e-05,
+ "loss": 0.5146,
+ "step": 9910
+ },
+ {
+ "epoch": 1.392280701754386,
+ "grad_norm": 1.4976569414138794,
+ "learning_rate": 4.105325581296251e-05,
+ "loss": 0.4938,
+ "step": 9920
+ },
+ {
+ "epoch": 1.3936842105263159,
+ "grad_norm": 1.3835642337799072,
+ "learning_rate": 4.103633950433528e-05,
+ "loss": 0.5353,
+ "step": 9930
+ },
+ {
+ "epoch": 1.3950877192982456,
+ "grad_norm": 1.504701018333435,
+ "learning_rate": 4.1019410710868115e-05,
+ "loss": 0.4869,
+ "step": 9940
+ },
+ {
+ "epoch": 1.3964912280701753,
+ "grad_norm": 1.6695371866226196,
+ "learning_rate": 4.100246944574064e-05,
+ "loss": 0.4858,
+ "step": 9950
+ },
+ {
+ "epoch": 1.3978947368421053,
+ "grad_norm": 1.033554196357727,
+ "learning_rate": 4.098551572214223e-05,
+ "loss": 0.5173,
+ "step": 9960
+ },
+ {
+ "epoch": 1.399298245614035,
+ "grad_norm": 2.1895320415496826,
+ "learning_rate": 4.0968549553271926e-05,
+ "loss": 0.5862,
+ "step": 9970
+ },
+ {
+ "epoch": 1.400701754385965,
+ "grad_norm": 2.323758363723755,
+ "learning_rate": 4.095157095233848e-05,
+ "loss": 0.5312,
+ "step": 9980
+ },
+ {
+ "epoch": 1.4021052631578947,
+ "grad_norm": 1.7012853622436523,
+ "learning_rate": 4.093457993256031e-05,
+ "loss": 0.4668,
+ "step": 9990
+ },
+ {
+ "epoch": 1.4035087719298245,
+ "grad_norm": 1.5631529092788696,
+ "learning_rate": 4.0917576507165514e-05,
+ "loss": 0.5192,
+ "step": 10000
+ },
+ {
+ "epoch": 1.4035087719298245,
+ "eval_loss": 0.6501449942588806,
+ "eval_runtime": 43.9031,
+ "eval_samples_per_second": 34.166,
+ "eval_steps_per_second": 8.542,
+ "step": 10000
+ },
+ {
+ "epoch": 1.4049122807017544,
+ "grad_norm": 1.5768696069717407,
+ "learning_rate": 4.090056068939183e-05,
+ "loss": 0.6369,
+ "step": 10010
+ },
+ {
+ "epoch": 1.4063157894736842,
+ "grad_norm": 2.0955562591552734,
+ "learning_rate": 4.088353249248667e-05,
+ "loss": 0.4765,
+ "step": 10020
+ },
+ {
+ "epoch": 1.4077192982456141,
+ "grad_norm": 1.7173198461532593,
+ "learning_rate": 4.0866491929707064e-05,
+ "loss": 0.4858,
+ "step": 10030
+ },
+ {
+ "epoch": 1.4091228070175439,
+ "grad_norm": 1.091640591621399,
+ "learning_rate": 4.084943901431966e-05,
+ "loss": 0.4502,
+ "step": 10040
+ },
+ {
+ "epoch": 1.4105263157894736,
+ "grad_norm": 0.9591197967529297,
+ "learning_rate": 4.083237375960075e-05,
+ "loss": 0.5036,
+ "step": 10050
+ },
+ {
+ "epoch": 1.4119298245614036,
+ "grad_norm": 1.6289422512054443,
+ "learning_rate": 4.081529617883622e-05,
+ "loss": 0.5185,
+ "step": 10060
+ },
+ {
+ "epoch": 1.4133333333333333,
+ "grad_norm": 1.0051218271255493,
+ "learning_rate": 4.079820628532155e-05,
+ "loss": 0.4701,
+ "step": 10070
+ },
+ {
+ "epoch": 1.4147368421052633,
+ "grad_norm": 1.5907773971557617,
+ "learning_rate": 4.0781104092361813e-05,
+ "loss": 0.5406,
+ "step": 10080
+ },
+ {
+ "epoch": 1.416140350877193,
+ "grad_norm": 1.4398341178894043,
+ "learning_rate": 4.0763989613271635e-05,
+ "loss": 0.4963,
+ "step": 10090
+ },
+ {
+ "epoch": 1.4175438596491228,
+ "grad_norm": 1.8131810426712036,
+ "learning_rate": 4.0746862861375245e-05,
+ "loss": 0.5931,
+ "step": 10100
+ },
+ {
+ "epoch": 1.4189473684210525,
+ "grad_norm": 1.3968654870986938,
+ "learning_rate": 4.07297238500064e-05,
+ "loss": 0.4908,
+ "step": 10110
+ },
+ {
+ "epoch": 1.4203508771929825,
+ "grad_norm": 1.4002443552017212,
+ "learning_rate": 4.0712572592508394e-05,
+ "loss": 0.5732,
+ "step": 10120
+ },
+ {
+ "epoch": 1.4217543859649122,
+ "grad_norm": 1.8497573137283325,
+ "learning_rate": 4.069540910223409e-05,
+ "loss": 0.5323,
+ "step": 10130
+ },
+ {
+ "epoch": 1.4231578947368422,
+ "grad_norm": 1.6966348886489868,
+ "learning_rate": 4.067823339254584e-05,
+ "loss": 0.5727,
+ "step": 10140
+ },
+ {
+ "epoch": 1.424561403508772,
+ "grad_norm": 1.1128507852554321,
+ "learning_rate": 4.066104547681553e-05,
+ "loss": 0.5295,
+ "step": 10150
+ },
+ {
+ "epoch": 1.4259649122807017,
+ "grad_norm": 1.2667880058288574,
+ "learning_rate": 4.0643845368424545e-05,
+ "loss": 0.554,
+ "step": 10160
+ },
+ {
+ "epoch": 1.4273684210526316,
+ "grad_norm": 2.0188159942626953,
+ "learning_rate": 4.062663308076374e-05,
+ "loss": 0.5138,
+ "step": 10170
+ },
+ {
+ "epoch": 1.4287719298245614,
+ "grad_norm": 1.113797903060913,
+ "learning_rate": 4.0609408627233494e-05,
+ "loss": 0.543,
+ "step": 10180
+ },
+ {
+ "epoch": 1.4301754385964913,
+ "grad_norm": 1.800862193107605,
+ "learning_rate": 4.059217202124361e-05,
+ "loss": 0.5094,
+ "step": 10190
+ },
+ {
+ "epoch": 1.431578947368421,
+ "grad_norm": 1.7962946891784668,
+ "learning_rate": 4.0574923276213405e-05,
+ "loss": 0.5468,
+ "step": 10200
+ },
+ {
+ "epoch": 1.4329824561403508,
+ "grad_norm": 2.258661985397339,
+ "learning_rate": 4.0557662405571595e-05,
+ "loss": 0.6082,
+ "step": 10210
+ },
+ {
+ "epoch": 1.4343859649122808,
+ "grad_norm": 2.073396921157837,
+ "learning_rate": 4.054038942275637e-05,
+ "loss": 0.5164,
+ "step": 10220
+ },
+ {
+ "epoch": 1.4357894736842105,
+ "grad_norm": 1.6468226909637451,
+ "learning_rate": 4.052310434121533e-05,
+ "loss": 0.5451,
+ "step": 10230
+ },
+ {
+ "epoch": 1.4371929824561405,
+ "grad_norm": 2.4547080993652344,
+ "learning_rate": 4.050580717440552e-05,
+ "loss": 0.5821,
+ "step": 10240
+ },
+ {
+ "epoch": 1.4385964912280702,
+ "grad_norm": 1.6606553792953491,
+ "learning_rate": 4.048849793579337e-05,
+ "loss": 0.5088,
+ "step": 10250
+ },
+ {
+ "epoch": 1.44,
+ "grad_norm": 1.6139086484909058,
+ "learning_rate": 4.04711766388547e-05,
+ "loss": 0.5441,
+ "step": 10260
+ },
+ {
+ "epoch": 1.4414035087719297,
+ "grad_norm": 1.6367645263671875,
+ "learning_rate": 4.0453843297074756e-05,
+ "loss": 0.494,
+ "step": 10270
+ },
+ {
+ "epoch": 1.4428070175438596,
+ "grad_norm": 1.6214492321014404,
+ "learning_rate": 4.043649792394812e-05,
+ "loss": 0.571,
+ "step": 10280
+ },
+ {
+ "epoch": 1.4442105263157896,
+ "grad_norm": 1.8566523790359497,
+ "learning_rate": 4.041914053297878e-05,
+ "loss": 0.5845,
+ "step": 10290
+ },
+ {
+ "epoch": 1.4456140350877194,
+ "grad_norm": 1.105668067932129,
+ "learning_rate": 4.0401771137680046e-05,
+ "loss": 0.4655,
+ "step": 10300
+ },
+ {
+ "epoch": 1.447017543859649,
+ "grad_norm": 1.5444446802139282,
+ "learning_rate": 4.038438975157458e-05,
+ "loss": 0.4939,
+ "step": 10310
+ },
+ {
+ "epoch": 1.4484210526315788,
+ "grad_norm": 2.6764674186706543,
+ "learning_rate": 4.036699638819441e-05,
+ "loss": 0.6172,
+ "step": 10320
+ },
+ {
+ "epoch": 1.4498245614035088,
+ "grad_norm": 1.574623942375183,
+ "learning_rate": 4.0349591061080846e-05,
+ "loss": 0.4888,
+ "step": 10330
+ },
+ {
+ "epoch": 1.4512280701754385,
+ "grad_norm": 2.2457685470581055,
+ "learning_rate": 4.0332173783784536e-05,
+ "loss": 0.4427,
+ "step": 10340
+ },
+ {
+ "epoch": 1.4526315789473685,
+ "grad_norm": 1.68437659740448,
+ "learning_rate": 4.031474456986543e-05,
+ "loss": 0.4867,
+ "step": 10350
+ },
+ {
+ "epoch": 1.4540350877192982,
+ "grad_norm": 1.4421491622924805,
+ "learning_rate": 4.0297303432892775e-05,
+ "loss": 0.4401,
+ "step": 10360
+ },
+ {
+ "epoch": 1.455438596491228,
+ "grad_norm": 2.1541783809661865,
+ "learning_rate": 4.027985038644507e-05,
+ "loss": 0.546,
+ "step": 10370
+ },
+ {
+ "epoch": 1.456842105263158,
+ "grad_norm": 1.7601039409637451,
+ "learning_rate": 4.026238544411014e-05,
+ "loss": 0.5211,
+ "step": 10380
+ },
+ {
+ "epoch": 1.4582456140350877,
+ "grad_norm": 1.2212331295013428,
+ "learning_rate": 4.024490861948503e-05,
+ "loss": 0.4633,
+ "step": 10390
+ },
+ {
+ "epoch": 1.4596491228070176,
+ "grad_norm": 1.688339114189148,
+ "learning_rate": 4.022741992617603e-05,
+ "loss": 0.5898,
+ "step": 10400
+ },
+ {
+ "epoch": 1.4610526315789474,
+ "grad_norm": 0.9874732494354248,
+ "learning_rate": 4.020991937779872e-05,
+ "loss": 0.4944,
+ "step": 10410
+ },
+ {
+ "epoch": 1.4624561403508771,
+ "grad_norm": 0.9479324817657471,
+ "learning_rate": 4.019240698797785e-05,
+ "loss": 0.55,
+ "step": 10420
+ },
+ {
+ "epoch": 1.463859649122807,
+ "grad_norm": 2.4362101554870605,
+ "learning_rate": 4.017488277034742e-05,
+ "loss": 0.5103,
+ "step": 10430
+ },
+ {
+ "epoch": 1.4652631578947368,
+ "grad_norm": 1.659631371498108,
+ "learning_rate": 4.015734673855065e-05,
+ "loss": 0.5073,
+ "step": 10440
+ },
+ {
+ "epoch": 1.4666666666666668,
+ "grad_norm": 1.9145371913909912,
+ "learning_rate": 4.013979890623992e-05,
+ "loss": 0.5588,
+ "step": 10450
+ },
+ {
+ "epoch": 1.4680701754385965,
+ "grad_norm": 1.3370527029037476,
+ "learning_rate": 4.0122239287076834e-05,
+ "loss": 0.5984,
+ "step": 10460
+ },
+ {
+ "epoch": 1.4694736842105263,
+ "grad_norm": 1.9445977210998535,
+ "learning_rate": 4.010466789473215e-05,
+ "loss": 0.5437,
+ "step": 10470
+ },
+ {
+ "epoch": 1.470877192982456,
+ "grad_norm": 1.197405457496643,
+ "learning_rate": 4.008708474288581e-05,
+ "loss": 0.4573,
+ "step": 10480
+ },
+ {
+ "epoch": 1.472280701754386,
+ "grad_norm": 1.8886786699295044,
+ "learning_rate": 4.006948984522687e-05,
+ "loss": 0.5319,
+ "step": 10490
+ },
+ {
+ "epoch": 1.4736842105263157,
+ "grad_norm": 1.8042242527008057,
+ "learning_rate": 4.00518832154536e-05,
+ "loss": 0.4559,
+ "step": 10500
+ },
+ {
+ "epoch": 1.4750877192982457,
+ "grad_norm": 1.6905839443206787,
+ "learning_rate": 4.003426486727335e-05,
+ "loss": 0.4776,
+ "step": 10510
+ },
+ {
+ "epoch": 1.4764912280701754,
+ "grad_norm": 1.3357295989990234,
+ "learning_rate": 4.00166348144026e-05,
+ "loss": 0.5672,
+ "step": 10520
+ },
+ {
+ "epoch": 1.4778947368421052,
+ "grad_norm": 1.399383783340454,
+ "learning_rate": 3.9998993070566954e-05,
+ "loss": 0.5926,
+ "step": 10530
+ },
+ {
+ "epoch": 1.4792982456140351,
+ "grad_norm": 1.8520910739898682,
+ "learning_rate": 3.998133964950112e-05,
+ "loss": 0.4975,
+ "step": 10540
+ },
+ {
+ "epoch": 1.4807017543859649,
+ "grad_norm": 0.865352988243103,
+ "learning_rate": 3.9963674564948886e-05,
+ "loss": 0.5229,
+ "step": 10550
+ },
+ {
+ "epoch": 1.4821052631578948,
+ "grad_norm": 2.3526158332824707,
+ "learning_rate": 3.9945997830663126e-05,
+ "loss": 0.4995,
+ "step": 10560
+ },
+ {
+ "epoch": 1.4835087719298246,
+ "grad_norm": 2.1046128273010254,
+ "learning_rate": 3.992830946040579e-05,
+ "loss": 0.5464,
+ "step": 10570
+ },
+ {
+ "epoch": 1.4849122807017543,
+ "grad_norm": 2.0516717433929443,
+ "learning_rate": 3.9910609467947866e-05,
+ "loss": 0.5691,
+ "step": 10580
+ },
+ {
+ "epoch": 1.4863157894736843,
+ "grad_norm": 1.386889100074768,
+ "learning_rate": 3.989289786706942e-05,
+ "loss": 0.5982,
+ "step": 10590
+ },
+ {
+ "epoch": 1.487719298245614,
+ "grad_norm": 1.7038406133651733,
+ "learning_rate": 3.987517467155954e-05,
+ "loss": 0.5173,
+ "step": 10600
+ },
+ {
+ "epoch": 1.489122807017544,
+ "grad_norm": 1.3720016479492188,
+ "learning_rate": 3.985743989521633e-05,
+ "loss": 0.4406,
+ "step": 10610
+ },
+ {
+ "epoch": 1.4905263157894737,
+ "grad_norm": 1.5871185064315796,
+ "learning_rate": 3.9839693551846924e-05,
+ "loss": 0.5417,
+ "step": 10620
+ },
+ {
+ "epoch": 1.4919298245614034,
+ "grad_norm": 1.440131664276123,
+ "learning_rate": 3.982193565526747e-05,
+ "loss": 0.6226,
+ "step": 10630
+ },
+ {
+ "epoch": 1.4933333333333334,
+ "grad_norm": 1.91805100440979,
+ "learning_rate": 3.9804166219303086e-05,
+ "loss": 0.5337,
+ "step": 10640
+ },
+ {
+ "epoch": 1.4947368421052631,
+ "grad_norm": 1.9551035165786743,
+ "learning_rate": 3.9786385257787886e-05,
+ "loss": 0.5027,
+ "step": 10650
+ },
+ {
+ "epoch": 1.496140350877193,
+ "grad_norm": 1.93511962890625,
+ "learning_rate": 3.9768592784564974e-05,
+ "loss": 0.5676,
+ "step": 10660
+ },
+ {
+ "epoch": 1.4975438596491228,
+ "grad_norm": 1.6610795259475708,
+ "learning_rate": 3.975078881348638e-05,
+ "loss": 0.4542,
+ "step": 10670
+ },
+ {
+ "epoch": 1.4989473684210526,
+ "grad_norm": 1.6202747821807861,
+ "learning_rate": 3.9732973358413115e-05,
+ "loss": 0.5563,
+ "step": 10680
+ },
+ {
+ "epoch": 1.5003508771929823,
+ "grad_norm": 1.8837049007415771,
+ "learning_rate": 3.971514643321513e-05,
+ "loss": 0.4128,
+ "step": 10690
+ },
+ {
+ "epoch": 1.5017543859649123,
+ "grad_norm": 1.5047054290771484,
+ "learning_rate": 3.969730805177129e-05,
+ "loss": 0.4909,
+ "step": 10700
+ },
+ {
+ "epoch": 1.5031578947368422,
+ "grad_norm": 1.5055679082870483,
+ "learning_rate": 3.967945822796938e-05,
+ "loss": 0.5664,
+ "step": 10710
+ },
+ {
+ "epoch": 1.504561403508772,
+ "grad_norm": 1.2587159872055054,
+ "learning_rate": 3.9661596975706104e-05,
+ "loss": 0.5827,
+ "step": 10720
+ },
+ {
+ "epoch": 1.5059649122807017,
+ "grad_norm": 1.2444645166397095,
+ "learning_rate": 3.9643724308887065e-05,
+ "loss": 0.5105,
+ "step": 10730
+ },
+ {
+ "epoch": 1.5073684210526315,
+ "grad_norm": 2.214508295059204,
+ "learning_rate": 3.962584024142675e-05,
+ "loss": 0.5455,
+ "step": 10740
+ },
+ {
+ "epoch": 1.5087719298245614,
+ "grad_norm": 1.7483428716659546,
+ "learning_rate": 3.96079447872485e-05,
+ "loss": 0.5419,
+ "step": 10750
+ },
+ {
+ "epoch": 1.5101754385964914,
+ "grad_norm": 2.044471263885498,
+ "learning_rate": 3.9590037960284546e-05,
+ "loss": 0.5861,
+ "step": 10760
+ },
+ {
+ "epoch": 1.5115789473684211,
+ "grad_norm": 2.063427209854126,
+ "learning_rate": 3.9572119774475975e-05,
+ "loss": 0.5692,
+ "step": 10770
+ },
+ {
+ "epoch": 1.5129824561403509,
+ "grad_norm": 2.0187671184539795,
+ "learning_rate": 3.95541902437727e-05,
+ "loss": 0.5345,
+ "step": 10780
+ },
+ {
+ "epoch": 1.5143859649122806,
+ "grad_norm": 1.9781345129013062,
+ "learning_rate": 3.953624938213348e-05,
+ "loss": 0.5212,
+ "step": 10790
+ },
+ {
+ "epoch": 1.5157894736842106,
+ "grad_norm": 1.9213941097259521,
+ "learning_rate": 3.95182972035259e-05,
+ "loss": 0.4838,
+ "step": 10800
+ },
+ {
+ "epoch": 1.5171929824561403,
+ "grad_norm": 2.619076728820801,
+ "learning_rate": 3.950033372192633e-05,
+ "loss": 0.5011,
+ "step": 10810
+ },
+ {
+ "epoch": 1.5185964912280703,
+ "grad_norm": 1.8988882303237915,
+ "learning_rate": 3.948235895131997e-05,
+ "loss": 0.5043,
+ "step": 10820
+ },
+ {
+ "epoch": 1.52,
+ "grad_norm": 1.4304900169372559,
+ "learning_rate": 3.946437290570078e-05,
+ "loss": 0.5062,
+ "step": 10830
+ },
+ {
+ "epoch": 1.5214035087719298,
+ "grad_norm": 3.060408353805542,
+ "learning_rate": 3.944637559907152e-05,
+ "loss": 0.6164,
+ "step": 10840
+ },
+ {
+ "epoch": 1.5228070175438595,
+ "grad_norm": 0.9590080976486206,
+ "learning_rate": 3.9428367045443704e-05,
+ "loss": 0.5159,
+ "step": 10850
+ },
+ {
+ "epoch": 1.5242105263157895,
+ "grad_norm": 2.1461575031280518,
+ "learning_rate": 3.941034725883762e-05,
+ "loss": 0.6505,
+ "step": 10860
+ },
+ {
+ "epoch": 1.5256140350877194,
+ "grad_norm": 1.578477144241333,
+ "learning_rate": 3.939231625328229e-05,
+ "loss": 0.4808,
+ "step": 10870
+ },
+ {
+ "epoch": 1.5270175438596492,
+ "grad_norm": 1.8510093688964844,
+ "learning_rate": 3.9374274042815465e-05,
+ "loss": 0.5194,
+ "step": 10880
+ },
+ {
+ "epoch": 1.528421052631579,
+ "grad_norm": 1.9132167100906372,
+ "learning_rate": 3.935622064148361e-05,
+ "loss": 0.5079,
+ "step": 10890
+ },
+ {
+ "epoch": 1.5298245614035086,
+ "grad_norm": 1.3352388143539429,
+ "learning_rate": 3.9338156063341946e-05,
+ "loss": 0.4808,
+ "step": 10900
+ },
+ {
+ "epoch": 1.5312280701754386,
+ "grad_norm": 2.102167844772339,
+ "learning_rate": 3.932008032245434e-05,
+ "loss": 0.429,
+ "step": 10910
+ },
+ {
+ "epoch": 1.5326315789473686,
+ "grad_norm": 1.9585574865341187,
+ "learning_rate": 3.930199343289339e-05,
+ "loss": 0.489,
+ "step": 10920
+ },
+ {
+ "epoch": 1.5340350877192983,
+ "grad_norm": 1.905050277709961,
+ "learning_rate": 3.9283895408740355e-05,
+ "loss": 0.4881,
+ "step": 10930
+ },
+ {
+ "epoch": 1.535438596491228,
+ "grad_norm": 1.964416742324829,
+ "learning_rate": 3.926578626408517e-05,
+ "loss": 0.5913,
+ "step": 10940
+ },
+ {
+ "epoch": 1.5368421052631578,
+ "grad_norm": 2.3363118171691895,
+ "learning_rate": 3.924766601302642e-05,
+ "loss": 0.4719,
+ "step": 10950
+ },
+ {
+ "epoch": 1.5382456140350877,
+ "grad_norm": 1.7316786050796509,
+ "learning_rate": 3.9229534669671344e-05,
+ "loss": 0.5445,
+ "step": 10960
+ },
+ {
+ "epoch": 1.5396491228070175,
+ "grad_norm": 2.2813808917999268,
+ "learning_rate": 3.9211392248135815e-05,
+ "loss": 0.4989,
+ "step": 10970
+ },
+ {
+ "epoch": 1.5410526315789475,
+ "grad_norm": 0.9021309018135071,
+ "learning_rate": 3.9193238762544325e-05,
+ "loss": 0.5321,
+ "step": 10980
+ },
+ {
+ "epoch": 1.5424561403508772,
+ "grad_norm": 2.614776134490967,
+ "learning_rate": 3.9175074227029996e-05,
+ "loss": 0.4765,
+ "step": 10990
+ },
+ {
+ "epoch": 1.543859649122807,
+ "grad_norm": 2.1491498947143555,
+ "learning_rate": 3.915689865573454e-05,
+ "loss": 0.5748,
+ "step": 11000
+ },
+ {
+ "epoch": 1.545263157894737,
+ "grad_norm": 1.7609467506408691,
+ "learning_rate": 3.913871206280824e-05,
+ "loss": 0.7091,
+ "step": 11010
+ },
+ {
+ "epoch": 1.5466666666666666,
+ "grad_norm": 2.5683560371398926,
+ "learning_rate": 3.912051446241001e-05,
+ "loss": 0.507,
+ "step": 11020
+ },
+ {
+ "epoch": 1.5480701754385966,
+ "grad_norm": 1.3458114862442017,
+ "learning_rate": 3.910230586870729e-05,
+ "loss": 0.5738,
+ "step": 11030
+ },
+ {
+ "epoch": 1.5494736842105263,
+ "grad_norm": 1.7107462882995605,
+ "learning_rate": 3.90840862958761e-05,
+ "loss": 0.4437,
+ "step": 11040
+ },
+ {
+ "epoch": 1.550877192982456,
+ "grad_norm": 1.6637877225875854,
+ "learning_rate": 3.9065855758101e-05,
+ "loss": 0.4859,
+ "step": 11050
+ },
+ {
+ "epoch": 1.5522807017543858,
+ "grad_norm": 2.1268763542175293,
+ "learning_rate": 3.904761426957509e-05,
+ "loss": 0.5433,
+ "step": 11060
+ },
+ {
+ "epoch": 1.5536842105263158,
+ "grad_norm": 1.8485718965530396,
+ "learning_rate": 3.902936184449999e-05,
+ "loss": 0.5938,
+ "step": 11070
+ },
+ {
+ "epoch": 1.5550877192982457,
+ "grad_norm": 1.9369820356369019,
+ "learning_rate": 3.901109849708585e-05,
+ "loss": 0.4484,
+ "step": 11080
+ },
+ {
+ "epoch": 1.5564912280701755,
+ "grad_norm": 1.5223256349563599,
+ "learning_rate": 3.8992824241551295e-05,
+ "loss": 0.4353,
+ "step": 11090
+ },
+ {
+ "epoch": 1.5578947368421052,
+ "grad_norm": 1.2845451831817627,
+ "learning_rate": 3.897453909212348e-05,
+ "loss": 0.4497,
+ "step": 11100
+ },
+ {
+ "epoch": 1.559298245614035,
+ "grad_norm": 1.2751349210739136,
+ "learning_rate": 3.895624306303799e-05,
+ "loss": 0.4648,
+ "step": 11110
+ },
+ {
+ "epoch": 1.560701754385965,
+ "grad_norm": 1.6384958028793335,
+ "learning_rate": 3.893793616853894e-05,
+ "loss": 0.5921,
+ "step": 11120
+ },
+ {
+ "epoch": 1.5621052631578949,
+ "grad_norm": 1.817355990409851,
+ "learning_rate": 3.891961842287886e-05,
+ "loss": 0.4611,
+ "step": 11130
+ },
+ {
+ "epoch": 1.5635087719298246,
+ "grad_norm": 1.7115503549575806,
+ "learning_rate": 3.890128984031876e-05,
+ "loss": 0.4745,
+ "step": 11140
+ },
+ {
+ "epoch": 1.5649122807017544,
+ "grad_norm": 1.7166131734848022,
+ "learning_rate": 3.888295043512804e-05,
+ "loss": 0.5716,
+ "step": 11150
+ },
+ {
+ "epoch": 1.566315789473684,
+ "grad_norm": 1.8528428077697754,
+ "learning_rate": 3.886460022158458e-05,
+ "loss": 0.5193,
+ "step": 11160
+ },
+ {
+ "epoch": 1.567719298245614,
+ "grad_norm": 1.9985193014144897,
+ "learning_rate": 3.884623921397463e-05,
+ "loss": 0.4974,
+ "step": 11170
+ },
+ {
+ "epoch": 1.5691228070175438,
+ "grad_norm": 1.4072109460830688,
+ "learning_rate": 3.882786742659289e-05,
+ "loss": 0.4418,
+ "step": 11180
+ },
+ {
+ "epoch": 1.5705263157894738,
+ "grad_norm": 1.3553410768508911,
+ "learning_rate": 3.880948487374241e-05,
+ "loss": 0.5278,
+ "step": 11190
+ },
+ {
+ "epoch": 1.5719298245614035,
+ "grad_norm": 1.6441354751586914,
+ "learning_rate": 3.8791091569734625e-05,
+ "loss": 0.476,
+ "step": 11200
+ },
+ {
+ "epoch": 1.5733333333333333,
+ "grad_norm": 1.4078179597854614,
+ "learning_rate": 3.8772687528889385e-05,
+ "loss": 0.581,
+ "step": 11210
+ },
+ {
+ "epoch": 1.5747368421052632,
+ "grad_norm": 2.096179723739624,
+ "learning_rate": 3.875427276553485e-05,
+ "loss": 0.5076,
+ "step": 11220
+ },
+ {
+ "epoch": 1.576140350877193,
+ "grad_norm": 1.3418902158737183,
+ "learning_rate": 3.873584729400753e-05,
+ "loss": 0.5177,
+ "step": 11230
+ },
+ {
+ "epoch": 1.577543859649123,
+ "grad_norm": 2.1806328296661377,
+ "learning_rate": 3.8717411128652304e-05,
+ "loss": 0.5348,
+ "step": 11240
+ },
+ {
+ "epoch": 1.5789473684210527,
+ "grad_norm": 1.6312358379364014,
+ "learning_rate": 3.869896428382236e-05,
+ "loss": 0.4699,
+ "step": 11250
+ },
+ {
+ "epoch": 1.5803508771929824,
+ "grad_norm": 2.4073407649993896,
+ "learning_rate": 3.8680506773879184e-05,
+ "loss": 0.5403,
+ "step": 11260
+ },
+ {
+ "epoch": 1.5817543859649121,
+ "grad_norm": 2.2572133541107178,
+ "learning_rate": 3.8662038613192596e-05,
+ "loss": 0.4982,
+ "step": 11270
+ },
+ {
+ "epoch": 1.583157894736842,
+ "grad_norm": 1.9021124839782715,
+ "learning_rate": 3.8643559816140685e-05,
+ "loss": 0.5025,
+ "step": 11280
+ },
+ {
+ "epoch": 1.584561403508772,
+ "grad_norm": 1.5274001359939575,
+ "learning_rate": 3.862507039710982e-05,
+ "loss": 0.4716,
+ "step": 11290
+ },
+ {
+ "epoch": 1.5859649122807018,
+ "grad_norm": 1.0260459184646606,
+ "learning_rate": 3.860657037049466e-05,
+ "loss": 0.5378,
+ "step": 11300
+ },
+ {
+ "epoch": 1.5873684210526315,
+ "grad_norm": 2.2560298442840576,
+ "learning_rate": 3.85880597506981e-05,
+ "loss": 0.5017,
+ "step": 11310
+ },
+ {
+ "epoch": 1.5887719298245613,
+ "grad_norm": 1.3712495565414429,
+ "learning_rate": 3.856953855213131e-05,
+ "loss": 0.4612,
+ "step": 11320
+ },
+ {
+ "epoch": 1.5901754385964912,
+ "grad_norm": 1.3681050539016724,
+ "learning_rate": 3.855100678921365e-05,
+ "loss": 0.5077,
+ "step": 11330
+ },
+ {
+ "epoch": 1.5915789473684212,
+ "grad_norm": 2.0733203887939453,
+ "learning_rate": 3.8532464476372765e-05,
+ "loss": 0.5643,
+ "step": 11340
+ },
+ {
+ "epoch": 1.592982456140351,
+ "grad_norm": 2.131699323654175,
+ "learning_rate": 3.851391162804445e-05,
+ "loss": 0.4939,
+ "step": 11350
+ },
+ {
+ "epoch": 1.5943859649122807,
+ "grad_norm": 1.787874698638916,
+ "learning_rate": 3.849534825867275e-05,
+ "loss": 0.5191,
+ "step": 11360
+ },
+ {
+ "epoch": 1.5957894736842104,
+ "grad_norm": 1.7359322309494019,
+ "learning_rate": 3.847677438270988e-05,
+ "loss": 0.5361,
+ "step": 11370
+ },
+ {
+ "epoch": 1.5971929824561404,
+ "grad_norm": 1.7936285734176636,
+ "learning_rate": 3.845819001461625e-05,
+ "loss": 0.5005,
+ "step": 11380
+ },
+ {
+ "epoch": 1.5985964912280701,
+ "grad_norm": 0.9876174330711365,
+ "learning_rate": 3.8439595168860406e-05,
+ "loss": 0.491,
+ "step": 11390
+ },
+ {
+ "epoch": 1.6,
+ "grad_norm": 1.5902925729751587,
+ "learning_rate": 3.842098985991909e-05,
+ "loss": 0.5636,
+ "step": 11400
+ },
+ {
+ "epoch": 1.6014035087719298,
+ "grad_norm": 2.157257556915283,
+ "learning_rate": 3.840237410227717e-05,
+ "loss": 0.5482,
+ "step": 11410
+ },
+ {
+ "epoch": 1.6028070175438596,
+ "grad_norm": 2.4455907344818115,
+ "learning_rate": 3.838374791042764e-05,
+ "loss": 0.4854,
+ "step": 11420
+ },
+ {
+ "epoch": 1.6042105263157893,
+ "grad_norm": 2.3983774185180664,
+ "learning_rate": 3.8365111298871645e-05,
+ "loss": 0.5535,
+ "step": 11430
+ },
+ {
+ "epoch": 1.6056140350877193,
+ "grad_norm": 1.6250687837600708,
+ "learning_rate": 3.834646428211841e-05,
+ "loss": 0.5493,
+ "step": 11440
+ },
+ {
+ "epoch": 1.6070175438596492,
+ "grad_norm": 0.9640924334526062,
+ "learning_rate": 3.83278068746853e-05,
+ "loss": 0.5126,
+ "step": 11450
+ },
+ {
+ "epoch": 1.608421052631579,
+ "grad_norm": 1.3491884469985962,
+ "learning_rate": 3.830913909109772e-05,
+ "loss": 0.5692,
+ "step": 11460
+ },
+ {
+ "epoch": 1.6098245614035087,
+ "grad_norm": 1.2153112888336182,
+ "learning_rate": 3.8290460945889186e-05,
+ "loss": 0.4367,
+ "step": 11470
+ },
+ {
+ "epoch": 1.6112280701754385,
+ "grad_norm": 1.869314193725586,
+ "learning_rate": 3.827177245360129e-05,
+ "loss": 0.5275,
+ "step": 11480
+ },
+ {
+ "epoch": 1.6126315789473684,
+ "grad_norm": 2.1873159408569336,
+ "learning_rate": 3.825307362878364e-05,
+ "loss": 0.5663,
+ "step": 11490
+ },
+ {
+ "epoch": 1.6140350877192984,
+ "grad_norm": 1.1323574781417847,
+ "learning_rate": 3.823436448599393e-05,
+ "loss": 0.4986,
+ "step": 11500
+ },
+ {
+ "epoch": 1.6154385964912281,
+ "grad_norm": 1.424310326576233,
+ "learning_rate": 3.8215645039797874e-05,
+ "loss": 0.5401,
+ "step": 11510
+ },
+ {
+ "epoch": 1.6168421052631579,
+ "grad_norm": 1.8369444608688354,
+ "learning_rate": 3.8196915304769184e-05,
+ "loss": 0.534,
+ "step": 11520
+ },
+ {
+ "epoch": 1.6182456140350876,
+ "grad_norm": 1.404891848564148,
+ "learning_rate": 3.817817529548962e-05,
+ "loss": 0.457,
+ "step": 11530
+ },
+ {
+ "epoch": 1.6196491228070176,
+ "grad_norm": 1.5708239078521729,
+ "learning_rate": 3.815942502654889e-05,
+ "loss": 0.5023,
+ "step": 11540
+ },
+ {
+ "epoch": 1.6210526315789475,
+ "grad_norm": 1.4378736019134521,
+ "learning_rate": 3.8140664512544746e-05,
+ "loss": 0.4885,
+ "step": 11550
+ },
+ {
+ "epoch": 1.6224561403508773,
+ "grad_norm": 2.0573270320892334,
+ "learning_rate": 3.8121893768082896e-05,
+ "loss": 0.5204,
+ "step": 11560
+ },
+ {
+ "epoch": 1.623859649122807,
+ "grad_norm": 1.4034109115600586,
+ "learning_rate": 3.8103112807776986e-05,
+ "loss": 0.4611,
+ "step": 11570
+ },
+ {
+ "epoch": 1.6252631578947367,
+ "grad_norm": 1.8642430305480957,
+ "learning_rate": 3.8084321646248654e-05,
+ "loss": 0.4999,
+ "step": 11580
+ },
+ {
+ "epoch": 1.6266666666666667,
+ "grad_norm": 2.0132107734680176,
+ "learning_rate": 3.806552029812747e-05,
+ "loss": 0.5241,
+ "step": 11590
+ },
+ {
+ "epoch": 1.6280701754385964,
+ "grad_norm": 1.3874375820159912,
+ "learning_rate": 3.804670877805091e-05,
+ "loss": 0.5275,
+ "step": 11600
+ },
+ {
+ "epoch": 1.6294736842105264,
+ "grad_norm": 1.296034574508667,
+ "learning_rate": 3.802788710066439e-05,
+ "loss": 0.4517,
+ "step": 11610
+ },
+ {
+ "epoch": 1.6308771929824561,
+ "grad_norm": 1.7629979848861694,
+ "learning_rate": 3.800905528062123e-05,
+ "loss": 0.4437,
+ "step": 11620
+ },
+ {
+ "epoch": 1.6322807017543859,
+ "grad_norm": 1.788439393043518,
+ "learning_rate": 3.7990213332582665e-05,
+ "loss": 0.5334,
+ "step": 11630
+ },
+ {
+ "epoch": 1.6336842105263156,
+ "grad_norm": 1.6043287515640259,
+ "learning_rate": 3.7971361271217775e-05,
+ "loss": 0.5915,
+ "step": 11640
+ },
+ {
+ "epoch": 1.6350877192982456,
+ "grad_norm": 1.1127432584762573,
+ "learning_rate": 3.7952499111203544e-05,
+ "loss": 0.633,
+ "step": 11650
+ },
+ {
+ "epoch": 1.6364912280701756,
+ "grad_norm": 1.6562187671661377,
+ "learning_rate": 3.793362686722483e-05,
+ "loss": 0.523,
+ "step": 11660
+ },
+ {
+ "epoch": 1.6378947368421053,
+ "grad_norm": 2.0622971057891846,
+ "learning_rate": 3.7914744553974284e-05,
+ "loss": 0.5025,
+ "step": 11670
+ },
+ {
+ "epoch": 1.639298245614035,
+ "grad_norm": 2.0334134101867676,
+ "learning_rate": 3.789585218615246e-05,
+ "loss": 0.5153,
+ "step": 11680
+ },
+ {
+ "epoch": 1.6407017543859648,
+ "grad_norm": 1.7830958366394043,
+ "learning_rate": 3.787694977846771e-05,
+ "loss": 0.5783,
+ "step": 11690
+ },
+ {
+ "epoch": 1.6421052631578947,
+ "grad_norm": 2.2819151878356934,
+ "learning_rate": 3.78580373456362e-05,
+ "loss": 0.5333,
+ "step": 11700
+ },
+ {
+ "epoch": 1.6435087719298247,
+ "grad_norm": 1.7580994367599487,
+ "learning_rate": 3.783911490238191e-05,
+ "loss": 0.574,
+ "step": 11710
+ },
+ {
+ "epoch": 1.6449122807017544,
+ "grad_norm": 1.012489676475525,
+ "learning_rate": 3.782018246343661e-05,
+ "loss": 0.5028,
+ "step": 11720
+ },
+ {
+ "epoch": 1.6463157894736842,
+ "grad_norm": 0.9846917390823364,
+ "learning_rate": 3.780124004353987e-05,
+ "loss": 0.5425,
+ "step": 11730
+ },
+ {
+ "epoch": 1.647719298245614,
+ "grad_norm": 1.3875446319580078,
+ "learning_rate": 3.778228765743898e-05,
+ "loss": 0.4961,
+ "step": 11740
+ },
+ {
+ "epoch": 1.6491228070175439,
+ "grad_norm": 0.912992537021637,
+ "learning_rate": 3.776332531988903e-05,
+ "loss": 0.5135,
+ "step": 11750
+ },
+ {
+ "epoch": 1.6505263157894738,
+ "grad_norm": 1.4657293558120728,
+ "learning_rate": 3.774435304565288e-05,
+ "loss": 0.5917,
+ "step": 11760
+ },
+ {
+ "epoch": 1.6519298245614036,
+ "grad_norm": 1.4134496450424194,
+ "learning_rate": 3.772537084950106e-05,
+ "loss": 0.6529,
+ "step": 11770
+ },
+ {
+ "epoch": 1.6533333333333333,
+ "grad_norm": 1.0357835292816162,
+ "learning_rate": 3.770637874621189e-05,
+ "loss": 0.4853,
+ "step": 11780
+ },
+ {
+ "epoch": 1.654736842105263,
+ "grad_norm": 1.1166404485702515,
+ "learning_rate": 3.7687376750571347e-05,
+ "loss": 0.5509,
+ "step": 11790
+ },
+ {
+ "epoch": 1.656140350877193,
+ "grad_norm": 1.819243311882019,
+ "learning_rate": 3.7668364877373154e-05,
+ "loss": 0.5083,
+ "step": 11800
+ },
+ {
+ "epoch": 1.6575438596491228,
+ "grad_norm": 2.0793590545654297,
+ "learning_rate": 3.764934314141869e-05,
+ "loss": 0.5239,
+ "step": 11810
+ },
+ {
+ "epoch": 1.6589473684210527,
+ "grad_norm": 1.4497408866882324,
+ "learning_rate": 3.763031155751705e-05,
+ "loss": 0.5295,
+ "step": 11820
+ },
+ {
+ "epoch": 1.6603508771929825,
+ "grad_norm": 1.8401798009872437,
+ "learning_rate": 3.7611270140484956e-05,
+ "loss": 0.3987,
+ "step": 11830
+ },
+ {
+ "epoch": 1.6617543859649122,
+ "grad_norm": 1.0776817798614502,
+ "learning_rate": 3.759221890514681e-05,
+ "loss": 0.5236,
+ "step": 11840
+ },
+ {
+ "epoch": 1.663157894736842,
+ "grad_norm": 1.6151456832885742,
+ "learning_rate": 3.757315786633465e-05,
+ "loss": 0.4783,
+ "step": 11850
+ },
+ {
+ "epoch": 1.664561403508772,
+ "grad_norm": 2.149061679840088,
+ "learning_rate": 3.7554087038888155e-05,
+ "loss": 0.6304,
+ "step": 11860
+ },
+ {
+ "epoch": 1.6659649122807019,
+ "grad_norm": 1.848923683166504,
+ "learning_rate": 3.753500643765461e-05,
+ "loss": 0.4951,
+ "step": 11870
+ },
+ {
+ "epoch": 1.6673684210526316,
+ "grad_norm": 1.3279706239700317,
+ "learning_rate": 3.751591607748891e-05,
+ "loss": 0.5195,
+ "step": 11880
+ },
+ {
+ "epoch": 1.6687719298245614,
+ "grad_norm": 2.280778646469116,
+ "learning_rate": 3.749681597325357e-05,
+ "loss": 0.6116,
+ "step": 11890
+ },
+ {
+ "epoch": 1.670175438596491,
+ "grad_norm": 1.8069521188735962,
+ "learning_rate": 3.7477706139818683e-05,
+ "loss": 0.5038,
+ "step": 11900
+ },
+ {
+ "epoch": 1.671578947368421,
+ "grad_norm": 1.8922659158706665,
+ "learning_rate": 3.745858659206188e-05,
+ "loss": 0.5671,
+ "step": 11910
+ },
+ {
+ "epoch": 1.672982456140351,
+ "grad_norm": 1.6917041540145874,
+ "learning_rate": 3.743945734486841e-05,
+ "loss": 0.5559,
+ "step": 11920
+ },
+ {
+ "epoch": 1.6743859649122808,
+ "grad_norm": 1.2357120513916016,
+ "learning_rate": 3.742031841313103e-05,
+ "loss": 0.5069,
+ "step": 11930
+ },
+ {
+ "epoch": 1.6757894736842105,
+ "grad_norm": 1.5987924337387085,
+ "learning_rate": 3.7401169811750066e-05,
+ "loss": 0.5431,
+ "step": 11940
+ },
+ {
+ "epoch": 1.6771929824561402,
+ "grad_norm": 1.3475733995437622,
+ "learning_rate": 3.7382011555633365e-05,
+ "loss": 0.5636,
+ "step": 11950
+ },
+ {
+ "epoch": 1.6785964912280702,
+ "grad_norm": 1.4985164403915405,
+ "learning_rate": 3.736284365969627e-05,
+ "loss": 0.4871,
+ "step": 11960
+ },
+ {
+ "epoch": 1.6800000000000002,
+ "grad_norm": 1.2310782670974731,
+ "learning_rate": 3.7343666138861646e-05,
+ "loss": 0.6245,
+ "step": 11970
+ },
+ {
+ "epoch": 1.68140350877193,
+ "grad_norm": 1.7251943349838257,
+ "learning_rate": 3.7324479008059865e-05,
+ "loss": 0.5126,
+ "step": 11980
+ },
+ {
+ "epoch": 1.6828070175438596,
+ "grad_norm": 2.389265775680542,
+ "learning_rate": 3.7305282282228756e-05,
+ "loss": 0.5669,
+ "step": 11990
+ },
+ {
+ "epoch": 1.6842105263157894,
+ "grad_norm": 2.130988359451294,
+ "learning_rate": 3.728607597631363e-05,
+ "loss": 0.4796,
+ "step": 12000
+ },
+ {
+ "epoch": 1.6842105263157894,
+ "eval_loss": 0.6500447392463684,
+ "eval_runtime": 119.5925,
+ "eval_samples_per_second": 12.543,
+ "eval_steps_per_second": 3.136,
+ "step": 12000
+ },
+ {
+ "epoch": 1.6842105263157894,
+ "step": 12000,
+ "total_flos": 6.933368738955264e+17,
+ "train_loss": 0.6038338423768679,
+ "train_runtime": 5861.7175,
+ "train_samples_per_second": 24.31,
+ "train_steps_per_second": 6.078
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 35625,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 2000,
+ "total_flos": 6.933368738955264e+17,
+ "train_batch_size": 4,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/llama2_13b_peft/news_commentary_it/training_args.bin b/llama2_13b_peft/news_commentary_it/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..af167c750acab3aff76c7f4ec3428ac3da073b63
--- /dev/null
+++ b/llama2_13b_peft/news_commentary_it/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c760325c4e915ed6add2ca1fa0f2456628f1a65e9e53ca6ae7e92088e8ec81d2
+size 5176
diff --git a/llama2_13b_peft/news_commentary_it/training_eval_loss.png b/llama2_13b_peft/news_commentary_it/training_eval_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..a7edaa057756a37411e06f8d00aad99df3881f20
Binary files /dev/null and b/llama2_13b_peft/news_commentary_it/training_eval_loss.png differ
diff --git a/llama2_13b_peft/news_commentary_it/training_loss.png b/llama2_13b_peft/news_commentary_it/training_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..0cd71870d015d9281f5fc6e0b90202427f93b881
Binary files /dev/null and b/llama2_13b_peft/news_commentary_it/training_loss.png differ
diff --git a/llama2_13b_peft/topical_chat/README.md b/llama2_13b_peft/topical_chat/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d608fba36e56b9e62512321a94b466087309dd33
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/README.md
@@ -0,0 +1,89 @@
+---
+license: other
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: /data1/model/llama2/meta-llama/Llama2-13b
+model-index:
+- name: topical_chat_no_sys
+ results: []
+---
+
+
+
+# topical_chat_no_sys
+
+This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the topical_chat_no_sys dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.8941
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- total_train_batch_size: 8
+- total_eval_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 20
+- num_epochs: 5.0
+
+### Training results
+
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 2.1904 | 0.0472 | 100 | 2.1137 |
+| 1.9627 | 0.0944 | 200 | 2.0589 |
+| 2.0172 | 0.1416 | 300 | 2.0221 |
+| 1.8965 | 0.1889 | 400 | 1.9968 |
+| 1.9534 | 0.2361 | 500 | 1.9823 |
+| 1.8621 | 0.2833 | 600 | 1.9679 |
+| 1.9777 | 0.3305 | 700 | 1.9611 |
+| 2.0865 | 0.3777 | 800 | 1.9544 |
+| 1.9662 | 0.4249 | 900 | 1.9461 |
+| 1.8352 | 0.4721 | 1000 | 1.9376 |
+| 1.8973 | 0.5194 | 1100 | 1.9329 |
+| 1.9688 | 0.5666 | 1200 | 1.9264 |
+| 1.8383 | 0.6138 | 1300 | 1.9192 |
+| 1.9032 | 0.6610 | 1400 | 1.9146 |
+| 1.9295 | 0.7082 | 1500 | 1.9109 |
+| 1.8207 | 0.7554 | 1600 | 1.9061 |
+| 1.9119 | 0.8026 | 1700 | 1.9032 |
+| 1.8392 | 0.8499 | 1800 | 1.9019 |
+| 1.961 | 0.8971 | 1900 | 1.8994 |
+| 1.8913 | 0.9443 | 2000 | 1.8945 |
+| 1.8187 | 0.9915 | 2100 | 1.8941 |
+| 1.7296 | 1.0387 | 2200 | 1.9006 |
+| 1.6184 | 1.0859 | 2300 | 1.9040 |
+| 1.6973 | 1.1331 | 2400 | 1.9056 |
+
+
+### Framework versions
+
+- PEFT 0.10.0
+- Transformers 4.40.0
+- Pytorch 2.2.1
+- Datasets 2.18.0
+- Tokenizers 0.19.1
\ No newline at end of file
diff --git a/llama2_13b_peft/topical_chat/adapter_config.json b/llama2_13b_peft/topical_chat/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3993e35c744e2e9685fa9500a9626c4efa56cf55
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0.0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "v_proj",
+ "down_proj",
+ "q_proj",
+ "gate_proj",
+ "up_proj",
+ "k_proj",
+ "o_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/topical_chat/adapter_model.safetensors b/llama2_13b_peft/topical_chat/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b125471004322f3572d04d9bb04472e0b4818880
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e6e2ffd0565e16b426d000f5f8d1e22505be002104c4426830c2fe30127625f
+size 125248064
diff --git a/llama2_13b_peft/topical_chat/all_results.json b/llama2_13b_peft/topical_chat/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3d7332eb24985a5ddbd3d1f4ed34bf225ee8ac64
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/all_results.json
@@ -0,0 +1,12 @@
+{
+ "epoch": 1.13314447592068,
+ "eval_loss": 1.8941270112991333,
+ "eval_runtime": 40.8751,
+ "eval_samples_per_second": 21.823,
+ "eval_steps_per_second": 2.74,
+ "total_flos": 9.512959383227597e+17,
+ "train_loss": 1.9100826263427735,
+ "train_runtime": 3885.2685,
+ "train_samples_per_second": 21.805,
+ "train_steps_per_second": 2.726
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/topical_chat/eval_results.json b/llama2_13b_peft/topical_chat/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..24432d1fabce0bc2ff7d9840aee39adde74339c8
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/eval_results.json
@@ -0,0 +1,7 @@
+{
+ "epoch": 1.13314447592068,
+ "eval_loss": 1.8941270112991333,
+ "eval_runtime": 40.8751,
+ "eval_samples_per_second": 21.823,
+ "eval_steps_per_second": 2.74
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/topical_chat/special_tokens_map.json b/llama2_13b_peft/topical_chat/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/llama2_13b_peft/topical_chat/tokenizer.model b/llama2_13b_peft/topical_chat/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/llama2_13b_peft/topical_chat/tokenizer_config.json b/llama2_13b_peft/topical_chat/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/tokenizer_config.json
@@ -0,0 +1,45 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}
diff --git a/llama2_13b_peft/topical_chat/train_results.json b/llama2_13b_peft/topical_chat/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ffed294e18da55058edb77a4d1f6e18d56ffd866
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/train_results.json
@@ -0,0 +1,8 @@
+{
+ "epoch": 1.13314447592068,
+ "total_flos": 9.512959383227597e+17,
+ "train_loss": 1.9100826263427735,
+ "train_runtime": 3885.2685,
+ "train_samples_per_second": 21.805,
+ "train_steps_per_second": 2.726
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/topical_chat/trainer_log.jsonl b/llama2_13b_peft/topical_chat/trainer_log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..b4c7d7c3fa1fcc9079ad6d3359ae9b12dca8ec95
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/trainer_log.jsonl
@@ -0,0 +1,266 @@
+{"current_steps": 10, "total_steps": 10590, "loss": 2.8563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5e-05, "epoch": 0.004721435316336166, "percentage": 0.09, "elapsed_time": "0:00:13", "remaining_time": "3:54:01"}
+{"current_steps": 20, "total_steps": 10590, "loss": 2.6853, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-05, "epoch": 0.009442870632672332, "percentage": 0.19, "elapsed_time": "0:00:25", "remaining_time": "3:42:39"}
+{"current_steps": 30, "total_steps": 10590, "loss": 2.1764, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999988957701981e-05, "epoch": 0.014164305949008499, "percentage": 0.28, "elapsed_time": "0:00:37", "remaining_time": "3:37:08"}
+{"current_steps": 40, "total_steps": 10590, "loss": 2.15, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99995583090547e-05, "epoch": 0.018885741265344664, "percentage": 0.38, "elapsed_time": "0:00:49", "remaining_time": "3:35:35"}
+{"current_steps": 50, "total_steps": 10590, "loss": 2.1033, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999900619903104e-05, "epoch": 0.023607176581680833, "percentage": 0.47, "elapsed_time": "0:01:00", "remaining_time": "3:32:38"}
+{"current_steps": 60, "total_steps": 10590, "loss": 2.1417, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999823325182607e-05, "epoch": 0.028328611898016998, "percentage": 0.57, "elapsed_time": "0:01:11", "remaining_time": "3:29:28"}
+{"current_steps": 70, "total_steps": 10590, "loss": 2.1031, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9997239474267886e-05, "epoch": 0.033050047214353166, "percentage": 0.66, "elapsed_time": "0:01:22", "remaining_time": "3:26:19"}
+{"current_steps": 80, "total_steps": 10590, "loss": 1.9967, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9996024875135365e-05, "epoch": 0.03777148253068933, "percentage": 0.76, "elapsed_time": "0:01:34", "remaining_time": "3:26:30"}
+{"current_steps": 90, "total_steps": 10590, "loss": 2.1262, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999458946515808e-05, "epoch": 0.042492917847025496, "percentage": 0.85, "elapsed_time": "0:01:46", "remaining_time": "3:27:40"}
+{"current_steps": 100, "total_steps": 10590, "loss": 2.1904, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9992933257016194e-05, "epoch": 0.047214353163361665, "percentage": 0.94, "elapsed_time": "0:01:58", "remaining_time": "3:27:57"}
+{"current_steps": 100, "total_steps": 10590, "loss": null, "eval_loss": 2.113694906234741, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.047214353163361665, "percentage": 0.94, "elapsed_time": "0:01:58", "remaining_time": "3:27:57"}
+{"current_steps": 110, "total_steps": 10590, "loss": 2.062, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99910562653404e-05, "epoch": 0.05193578847969783, "percentage": 1.04, "elapsed_time": "0:02:52", "remaining_time": "4:34:02"}
+{"current_steps": 120, "total_steps": 10590, "loss": 1.9974, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9988958506711735e-05, "epoch": 0.056657223796033995, "percentage": 1.13, "elapsed_time": "0:03:04", "remaining_time": "4:28:26"}
+{"current_steps": 130, "total_steps": 10590, "loss": 2.124, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9986639999661454e-05, "epoch": 0.061378659112370164, "percentage": 1.23, "elapsed_time": "0:03:16", "remaining_time": "4:24:08"}
+{"current_steps": 140, "total_steps": 10590, "loss": 1.9903, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998410076467088e-05, "epoch": 0.06610009442870633, "percentage": 1.32, "elapsed_time": "0:03:28", "remaining_time": "4:19:49"}
+{"current_steps": 150, "total_steps": 10590, "loss": 2.1054, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9981340824171194e-05, "epoch": 0.0708215297450425, "percentage": 1.42, "elapsed_time": "0:03:41", "remaining_time": "4:16:35"}
+{"current_steps": 160, "total_steps": 10590, "loss": 2.0274, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997836020254328e-05, "epoch": 0.07554296506137866, "percentage": 1.51, "elapsed_time": "0:03:52", "remaining_time": "4:12:35"}
+{"current_steps": 170, "total_steps": 10590, "loss": 2.1484, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997515892611746e-05, "epoch": 0.08026440037771483, "percentage": 1.61, "elapsed_time": "0:04:05", "remaining_time": "4:10:40"}
+{"current_steps": 180, "total_steps": 10590, "loss": 1.9468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9971737023173286e-05, "epoch": 0.08498583569405099, "percentage": 1.7, "elapsed_time": "0:04:16", "remaining_time": "4:07:23"}
+{"current_steps": 190, "total_steps": 10590, "loss": 2.0637, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996809452393931e-05, "epoch": 0.08970727101038715, "percentage": 1.79, "elapsed_time": "0:04:29", "remaining_time": "4:05:27"}
+{"current_steps": 200, "total_steps": 10590, "loss": 1.9627, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996423146059277e-05, "epoch": 0.09442870632672333, "percentage": 1.89, "elapsed_time": "0:04:40", "remaining_time": "4:02:51"}
+{"current_steps": 200, "total_steps": 10590, "loss": null, "eval_loss": 2.058908462524414, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.09442870632672333, "percentage": 1.89, "elapsed_time": "0:04:40", "remaining_time": "4:02:51"}
+{"current_steps": 210, "total_steps": 10590, "loss": 1.9765, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.996014786725935e-05, "epoch": 0.09915014164305949, "percentage": 1.98, "elapsed_time": "0:05:35", "remaining_time": "4:36:00"}
+{"current_steps": 220, "total_steps": 10590, "loss": 2.0651, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9955843780012846e-05, "epoch": 0.10387157695939565, "percentage": 2.08, "elapsed_time": "0:05:45", "remaining_time": "4:31:31"}
+{"current_steps": 230, "total_steps": 10590, "loss": 1.9729, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995131923687488e-05, "epoch": 0.10859301227573183, "percentage": 2.17, "elapsed_time": "0:05:56", "remaining_time": "4:27:34"}
+{"current_steps": 240, "total_steps": 10590, "loss": 2.0653, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994657427781453e-05, "epoch": 0.11331444759206799, "percentage": 2.27, "elapsed_time": "0:06:08", "remaining_time": "4:25:01"}
+{"current_steps": 250, "total_steps": 10590, "loss": 2.185, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994160894474799e-05, "epoch": 0.11803588290840415, "percentage": 2.36, "elapsed_time": "0:06:21", "remaining_time": "4:22:49"}
+{"current_steps": 260, "total_steps": 10590, "loss": 2.0668, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993642328153822e-05, "epoch": 0.12275731822474033, "percentage": 2.46, "elapsed_time": "0:06:33", "remaining_time": "4:20:17"}
+{"current_steps": 270, "total_steps": 10590, "loss": 2.0643, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993101733399453e-05, "epoch": 0.1274787535410765, "percentage": 2.55, "elapsed_time": "0:06:44", "remaining_time": "4:17:34"}
+{"current_steps": 280, "total_steps": 10590, "loss": 2.0303, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9925391149872184e-05, "epoch": 0.13220018885741266, "percentage": 2.64, "elapsed_time": "0:06:56", "remaining_time": "4:15:18"}
+{"current_steps": 290, "total_steps": 10590, "loss": 1.9509, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9919544778871985e-05, "epoch": 0.1369216241737488, "percentage": 2.74, "elapsed_time": "0:07:07", "remaining_time": "4:13:21"}
+{"current_steps": 300, "total_steps": 10590, "loss": 2.0172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.991347827263982e-05, "epoch": 0.141643059490085, "percentage": 2.83, "elapsed_time": "0:07:19", "remaining_time": "4:11:22"}
+{"current_steps": 300, "total_steps": 10590, "loss": null, "eval_loss": 2.0220935344696045, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.141643059490085, "percentage": 2.83, "elapsed_time": "0:07:19", "remaining_time": "4:11:22"}
+{"current_steps": 310, "total_steps": 10590, "loss": 1.9862, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990719168476625e-05, "epoch": 0.14636449480642116, "percentage": 2.93, "elapsed_time": "0:08:13", "remaining_time": "4:32:50"}
+{"current_steps": 320, "total_steps": 10590, "loss": 1.9686, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.990068507078595e-05, "epoch": 0.1510859301227573, "percentage": 3.02, "elapsed_time": "0:08:25", "remaining_time": "4:30:13"}
+{"current_steps": 330, "total_steps": 10590, "loss": 1.9751, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.989395848817731e-05, "epoch": 0.1558073654390935, "percentage": 3.12, "elapsed_time": "0:08:38", "remaining_time": "4:28:26"}
+{"current_steps": 340, "total_steps": 10590, "loss": 1.9844, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.988701199636186e-05, "epoch": 0.16052880075542966, "percentage": 3.21, "elapsed_time": "0:08:50", "remaining_time": "4:26:18"}
+{"current_steps": 350, "total_steps": 10590, "loss": 2.1042, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.987984565670382e-05, "epoch": 0.1652502360717658, "percentage": 3.31, "elapsed_time": "0:09:02", "remaining_time": "4:24:33"}
+{"current_steps": 360, "total_steps": 10590, "loss": 1.9839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9872459532509436e-05, "epoch": 0.16997167138810199, "percentage": 3.4, "elapsed_time": "0:09:14", "remaining_time": "4:22:46"}
+{"current_steps": 370, "total_steps": 10590, "loss": 1.9492, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9864853689026556e-05, "epoch": 0.17469310670443816, "percentage": 3.49, "elapsed_time": "0:09:26", "remaining_time": "4:20:52"}
+{"current_steps": 380, "total_steps": 10590, "loss": 1.9942, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.985702819344397e-05, "epoch": 0.1794145420207743, "percentage": 3.59, "elapsed_time": "0:09:38", "remaining_time": "4:18:53"}
+{"current_steps": 390, "total_steps": 10590, "loss": 1.9091, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.984898311489085e-05, "epoch": 0.18413597733711048, "percentage": 3.68, "elapsed_time": "0:09:50", "remaining_time": "4:17:26"}
+{"current_steps": 400, "total_steps": 10590, "loss": 1.8965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.98407185244361e-05, "epoch": 0.18885741265344666, "percentage": 3.78, "elapsed_time": "0:10:02", "remaining_time": "4:15:44"}
+{"current_steps": 400, "total_steps": 10590, "loss": null, "eval_loss": 1.9968496561050415, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.18885741265344666, "percentage": 3.78, "elapsed_time": "0:10:02", "remaining_time": "4:15:44"}
+{"current_steps": 410, "total_steps": 10590, "loss": 1.8476, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.983223449508779e-05, "epoch": 0.1935788479697828, "percentage": 3.87, "elapsed_time": "0:10:56", "remaining_time": "4:31:34"}
+{"current_steps": 420, "total_steps": 10590, "loss": 1.9005, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982353110179246e-05, "epoch": 0.19830028328611898, "percentage": 3.97, "elapsed_time": "0:11:08", "remaining_time": "4:29:42"}
+{"current_steps": 430, "total_steps": 10590, "loss": 1.9098, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9814608421434485e-05, "epoch": 0.20302171860245516, "percentage": 4.06, "elapsed_time": "0:11:20", "remaining_time": "4:28:09"}
+{"current_steps": 440, "total_steps": 10590, "loss": 1.912, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9805466532835376e-05, "epoch": 0.2077431539187913, "percentage": 4.15, "elapsed_time": "0:11:33", "remaining_time": "4:26:40"}
+{"current_steps": 450, "total_steps": 10590, "loss": 2.044, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.979610551675311e-05, "epoch": 0.21246458923512748, "percentage": 4.25, "elapsed_time": "0:11:45", "remaining_time": "4:24:56"}
+{"current_steps": 460, "total_steps": 10590, "loss": 2.0439, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.978652545588137e-05, "epoch": 0.21718602455146366, "percentage": 4.34, "elapsed_time": "0:11:58", "remaining_time": "4:23:31"}
+{"current_steps": 470, "total_steps": 10590, "loss": 2.017, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.977672643484889e-05, "epoch": 0.2219074598677998, "percentage": 4.44, "elapsed_time": "0:12:09", "remaining_time": "4:21:48"}
+{"current_steps": 480, "total_steps": 10590, "loss": 1.9652, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.976670854021862e-05, "epoch": 0.22662889518413598, "percentage": 4.53, "elapsed_time": "0:12:20", "remaining_time": "4:20:04"}
+{"current_steps": 490, "total_steps": 10590, "loss": 1.9621, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.975647186048703e-05, "epoch": 0.23135033050047216, "percentage": 4.63, "elapsed_time": "0:12:32", "remaining_time": "4:18:28"}
+{"current_steps": 500, "total_steps": 10590, "loss": 1.9534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.97460164860833e-05, "epoch": 0.2360717658168083, "percentage": 4.72, "elapsed_time": "0:12:44", "remaining_time": "4:17:03"}
+{"current_steps": 500, "total_steps": 10590, "loss": null, "eval_loss": 1.982257604598999, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.2360717658168083, "percentage": 4.72, "elapsed_time": "0:12:44", "remaining_time": "4:17:03"}
+{"current_steps": 510, "total_steps": 10590, "loss": 1.9625, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.973534250936851e-05, "epoch": 0.24079320113314448, "percentage": 4.82, "elapsed_time": "0:13:36", "remaining_time": "4:28:58"}
+{"current_steps": 520, "total_steps": 10590, "loss": 1.9728, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.972445002463485e-05, "epoch": 0.24551463644948066, "percentage": 4.91, "elapsed_time": "0:13:49", "remaining_time": "4:27:40"}
+{"current_steps": 530, "total_steps": 10590, "loss": 1.9505, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.971333912810476e-05, "epoch": 0.2502360717658168, "percentage": 5.0, "elapsed_time": "0:13:59", "remaining_time": "4:25:32"}
+{"current_steps": 540, "total_steps": 10590, "loss": 1.9735, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.970200991793012e-05, "epoch": 0.254957507082153, "percentage": 5.1, "elapsed_time": "0:14:11", "remaining_time": "4:24:04"}
+{"current_steps": 550, "total_steps": 10590, "loss": 1.9265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.969046249419133e-05, "epoch": 0.25967894239848915, "percentage": 5.19, "elapsed_time": "0:14:23", "remaining_time": "4:22:38"}
+{"current_steps": 560, "total_steps": 10590, "loss": 1.9292, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.967869695889647e-05, "epoch": 0.26440037771482533, "percentage": 5.29, "elapsed_time": "0:14:35", "remaining_time": "4:21:13"}
+{"current_steps": 570, "total_steps": 10590, "loss": 1.9518, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966671341598037e-05, "epoch": 0.26912181303116145, "percentage": 5.38, "elapsed_time": "0:14:47", "remaining_time": "4:19:54"}
+{"current_steps": 580, "total_steps": 10590, "loss": 1.9796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.965451197130373e-05, "epoch": 0.2738432483474976, "percentage": 5.48, "elapsed_time": "0:14:58", "remaining_time": "4:18:33"}
+{"current_steps": 590, "total_steps": 10590, "loss": 1.9494, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.964209273265212e-05, "epoch": 0.2785646836638338, "percentage": 5.57, "elapsed_time": "0:15:10", "remaining_time": "4:17:06"}
+{"current_steps": 600, "total_steps": 10590, "loss": 1.8621, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9629455809735105e-05, "epoch": 0.28328611898017, "percentage": 5.67, "elapsed_time": "0:15:21", "remaining_time": "4:15:50"}
+{"current_steps": 600, "total_steps": 10590, "loss": null, "eval_loss": 1.9678794145584106, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.28328611898017, "percentage": 5.67, "elapsed_time": "0:15:21", "remaining_time": "4:15:50"}
+{"current_steps": 610, "total_steps": 10590, "loss": 1.8698, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9616601314185206e-05, "epoch": 0.28800755429650615, "percentage": 5.76, "elapsed_time": "0:16:16", "remaining_time": "4:26:11"}
+{"current_steps": 620, "total_steps": 10590, "loss": 1.9089, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9603529359556975e-05, "epoch": 0.2927289896128423, "percentage": 5.85, "elapsed_time": "0:16:27", "remaining_time": "4:24:44"}
+{"current_steps": 630, "total_steps": 10590, "loss": 2.026, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.959024006132593e-05, "epoch": 0.29745042492917845, "percentage": 5.95, "elapsed_time": "0:16:39", "remaining_time": "4:23:21"}
+{"current_steps": 640, "total_steps": 10590, "loss": 1.9059, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.95767335368876e-05, "epoch": 0.3021718602455146, "percentage": 6.04, "elapsed_time": "0:16:51", "remaining_time": "4:22:09"}
+{"current_steps": 650, "total_steps": 10590, "loss": 1.973, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.956300990555643e-05, "epoch": 0.3068932955618508, "percentage": 6.14, "elapsed_time": "0:17:03", "remaining_time": "4:20:53"}
+{"current_steps": 660, "total_steps": 10590, "loss": 1.9296, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.954906928856476e-05, "epoch": 0.311614730878187, "percentage": 6.23, "elapsed_time": "0:17:16", "remaining_time": "4:19:59"}
+{"current_steps": 670, "total_steps": 10590, "loss": 1.9321, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.953491180906175e-05, "epoch": 0.31633616619452315, "percentage": 6.33, "elapsed_time": "0:17:28", "remaining_time": "4:18:43"}
+{"current_steps": 680, "total_steps": 10590, "loss": 2.0338, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.952053759211229e-05, "epoch": 0.3210576015108593, "percentage": 6.42, "elapsed_time": "0:17:40", "remaining_time": "4:17:42"}
+{"current_steps": 690, "total_steps": 10590, "loss": 1.9328, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9505946764695885e-05, "epoch": 0.32577903682719545, "percentage": 6.52, "elapsed_time": "0:17:53", "remaining_time": "4:16:37"}
+{"current_steps": 700, "total_steps": 10590, "loss": 1.9777, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.949113945570555e-05, "epoch": 0.3305004721435316, "percentage": 6.61, "elapsed_time": "0:18:04", "remaining_time": "4:15:15"}
+{"current_steps": 700, "total_steps": 10590, "loss": null, "eval_loss": 1.9610685110092163, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.3305004721435316, "percentage": 6.61, "elapsed_time": "0:18:04", "remaining_time": "4:15:15"}
+{"current_steps": 710, "total_steps": 10590, "loss": 2.0113, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.947611579594666e-05, "epoch": 0.3352219074598678, "percentage": 6.7, "elapsed_time": "0:18:58", "remaining_time": "4:24:04"}
+{"current_steps": 720, "total_steps": 10590, "loss": 1.9097, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9460875918135804e-05, "epoch": 0.33994334277620397, "percentage": 6.8, "elapsed_time": "0:19:10", "remaining_time": "4:22:50"}
+{"current_steps": 730, "total_steps": 10590, "loss": 1.9226, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.944541995689958e-05, "epoch": 0.34466477809254015, "percentage": 6.89, "elapsed_time": "0:19:22", "remaining_time": "4:21:45"}
+{"current_steps": 740, "total_steps": 10590, "loss": 1.9583, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9429748048773475e-05, "epoch": 0.3493862134088763, "percentage": 6.99, "elapsed_time": "0:19:35", "remaining_time": "4:20:52"}
+{"current_steps": 750, "total_steps": 10590, "loss": 1.9101, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.941386033220058e-05, "epoch": 0.35410764872521244, "percentage": 7.08, "elapsed_time": "0:19:47", "remaining_time": "4:19:39"}
+{"current_steps": 760, "total_steps": 10590, "loss": 1.9238, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9397756947530414e-05, "epoch": 0.3588290840415486, "percentage": 7.18, "elapsed_time": "0:20:00", "remaining_time": "4:18:52"}
+{"current_steps": 770, "total_steps": 10590, "loss": 1.867, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.938143803701769e-05, "epoch": 0.3635505193578848, "percentage": 7.27, "elapsed_time": "0:20:11", "remaining_time": "4:17:36"}
+{"current_steps": 780, "total_steps": 10590, "loss": 1.9233, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9364903744821014e-05, "epoch": 0.36827195467422097, "percentage": 7.37, "elapsed_time": "0:20:23", "remaining_time": "4:16:30"}
+{"current_steps": 790, "total_steps": 10590, "loss": 1.8599, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.934815421700165e-05, "epoch": 0.37299338999055714, "percentage": 7.46, "elapsed_time": "0:20:35", "remaining_time": "4:15:22"}
+{"current_steps": 800, "total_steps": 10590, "loss": 2.0865, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.933118960152222e-05, "epoch": 0.3777148253068933, "percentage": 7.55, "elapsed_time": "0:20:47", "remaining_time": "4:14:26"}
+{"current_steps": 800, "total_steps": 10590, "loss": null, "eval_loss": 1.954448938369751, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.3777148253068933, "percentage": 7.55, "elapsed_time": "0:20:47", "remaining_time": "4:14:26"}
+{"current_steps": 810, "total_steps": 10590, "loss": 1.9036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.931401004824541e-05, "epoch": 0.38243626062322944, "percentage": 7.65, "elapsed_time": "0:21:40", "remaining_time": "4:21:46"}
+{"current_steps": 820, "total_steps": 10590, "loss": 1.92, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.92966157089326e-05, "epoch": 0.3871576959395656, "percentage": 7.74, "elapsed_time": "0:21:51", "remaining_time": "4:20:23"}
+{"current_steps": 830, "total_steps": 10590, "loss": 1.9894, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.927900673724259e-05, "epoch": 0.3918791312559018, "percentage": 7.84, "elapsed_time": "0:22:04", "remaining_time": "4:19:29"}
+{"current_steps": 840, "total_steps": 10590, "loss": 1.9978, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9261183288730176e-05, "epoch": 0.39660056657223797, "percentage": 7.93, "elapsed_time": "0:22:15", "remaining_time": "4:18:26"}
+{"current_steps": 850, "total_steps": 10590, "loss": 1.8262, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9243145520844834e-05, "epoch": 0.40132200188857414, "percentage": 8.03, "elapsed_time": "0:22:27", "remaining_time": "4:17:20"}
+{"current_steps": 860, "total_steps": 10590, "loss": 1.907, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9224893592929275e-05, "epoch": 0.4060434372049103, "percentage": 8.12, "elapsed_time": "0:22:39", "remaining_time": "4:16:24"}
+{"current_steps": 870, "total_steps": 10590, "loss": 1.9766, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.92064276662181e-05, "epoch": 0.41076487252124644, "percentage": 8.22, "elapsed_time": "0:22:52", "remaining_time": "4:15:30"}
+{"current_steps": 880, "total_steps": 10590, "loss": 1.9038, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9187747903836303e-05, "epoch": 0.4154863078375826, "percentage": 8.31, "elapsed_time": "0:23:04", "remaining_time": "4:14:37"}
+{"current_steps": 890, "total_steps": 10590, "loss": 1.9733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9168854470797904e-05, "epoch": 0.4202077431539188, "percentage": 8.4, "elapsed_time": "0:23:16", "remaining_time": "4:13:45"}
+{"current_steps": 900, "total_steps": 10590, "loss": 1.9662, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.914974753400443e-05, "epoch": 0.42492917847025496, "percentage": 8.5, "elapsed_time": "0:23:28", "remaining_time": "4:12:44"}
+{"current_steps": 900, "total_steps": 10590, "loss": null, "eval_loss": 1.946061372756958, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.42492917847025496, "percentage": 8.5, "elapsed_time": "0:23:28", "remaining_time": "4:12:44"}
+{"current_steps": 910, "total_steps": 10590, "loss": 2.0209, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.913042726224347e-05, "epoch": 0.42965061378659114, "percentage": 8.59, "elapsed_time": "0:24:23", "remaining_time": "4:19:27"}
+{"current_steps": 920, "total_steps": 10590, "loss": 1.8343, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.911089382618718e-05, "epoch": 0.4343720491029273, "percentage": 8.69, "elapsed_time": "0:24:36", "remaining_time": "4:18:34"}
+{"current_steps": 930, "total_steps": 10590, "loss": 1.952, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.909114739839079e-05, "epoch": 0.43909348441926344, "percentage": 8.78, "elapsed_time": "0:24:48", "remaining_time": "4:17:42"}
+{"current_steps": 940, "total_steps": 10590, "loss": 1.9064, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.907118815329104e-05, "epoch": 0.4438149197355996, "percentage": 8.88, "elapsed_time": "0:25:00", "remaining_time": "4:16:48"}
+{"current_steps": 950, "total_steps": 10590, "loss": 1.883, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.905101626720469e-05, "epoch": 0.4485363550519358, "percentage": 8.97, "elapsed_time": "0:25:12", "remaining_time": "4:15:48"}
+{"current_steps": 960, "total_steps": 10590, "loss": 1.8716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.903063191832691e-05, "epoch": 0.45325779036827196, "percentage": 9.07, "elapsed_time": "0:25:26", "remaining_time": "4:15:15"}
+{"current_steps": 970, "total_steps": 10590, "loss": 1.9534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.901003528672975e-05, "epoch": 0.45797922568460814, "percentage": 9.16, "elapsed_time": "0:25:40", "remaining_time": "4:14:33"}
+{"current_steps": 980, "total_steps": 10590, "loss": 1.8552, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.898922655436052e-05, "epoch": 0.4627006610009443, "percentage": 9.25, "elapsed_time": "0:25:50", "remaining_time": "4:13:20"}
+{"current_steps": 990, "total_steps": 10590, "loss": 1.974, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8968205905040207e-05, "epoch": 0.46742209631728043, "percentage": 9.35, "elapsed_time": "0:26:02", "remaining_time": "4:12:29"}
+{"current_steps": 1000, "total_steps": 10590, "loss": 1.8352, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.894697352446182e-05, "epoch": 0.4721435316336166, "percentage": 9.44, "elapsed_time": "0:26:14", "remaining_time": "4:11:36"}
+{"current_steps": 1000, "total_steps": 10590, "loss": null, "eval_loss": 1.9375569820404053, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.4721435316336166, "percentage": 9.44, "elapsed_time": "0:26:14", "remaining_time": "4:11:36"}
+{"current_steps": 1010, "total_steps": 10590, "loss": 1.9139, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8925529600188794e-05, "epoch": 0.4768649669499528, "percentage": 9.54, "elapsed_time": "0:27:08", "remaining_time": "4:17:27"}
+{"current_steps": 1020, "total_steps": 10590, "loss": 1.8118, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8903874321653274e-05, "epoch": 0.48158640226628896, "percentage": 9.63, "elapsed_time": "0:27:19", "remaining_time": "4:16:23"}
+{"current_steps": 1030, "total_steps": 10590, "loss": 1.9924, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.88820078801545e-05, "epoch": 0.48630783758262514, "percentage": 9.73, "elapsed_time": "0:27:31", "remaining_time": "4:15:25"}
+{"current_steps": 1040, "total_steps": 10590, "loss": 1.8814, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.885993046885708e-05, "epoch": 0.4910292728989613, "percentage": 9.82, "elapsed_time": "0:27:43", "remaining_time": "4:14:38"}
+{"current_steps": 1050, "total_steps": 10590, "loss": 1.8928, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.883764228278931e-05, "epoch": 0.49575070821529743, "percentage": 9.92, "elapsed_time": "0:27:55", "remaining_time": "4:13:46"}
+{"current_steps": 1060, "total_steps": 10590, "loss": 1.9071, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.881514351884141e-05, "epoch": 0.5004721435316336, "percentage": 10.01, "elapsed_time": "0:28:08", "remaining_time": "4:13:00"}
+{"current_steps": 1070, "total_steps": 10590, "loss": 1.939, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.879243437576383e-05, "epoch": 0.5051935788479698, "percentage": 10.1, "elapsed_time": "0:28:21", "remaining_time": "4:12:18"}
+{"current_steps": 1080, "total_steps": 10590, "loss": 1.8603, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.876951505416547e-05, "epoch": 0.509915014164306, "percentage": 10.2, "elapsed_time": "0:28:32", "remaining_time": "4:11:20"}
+{"current_steps": 1090, "total_steps": 10590, "loss": 1.86, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8746385756511915e-05, "epoch": 0.5146364494806421, "percentage": 10.29, "elapsed_time": "0:28:44", "remaining_time": "4:10:29"}
+{"current_steps": 1100, "total_steps": 10590, "loss": 1.8973, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.872304668712364e-05, "epoch": 0.5193578847969783, "percentage": 10.39, "elapsed_time": "0:28:55", "remaining_time": "4:09:32"}
+{"current_steps": 1100, "total_steps": 10590, "loss": null, "eval_loss": 1.932855248451233, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.5193578847969783, "percentage": 10.39, "elapsed_time": "0:28:55", "remaining_time": "4:09:32"}
+{"current_steps": 1110, "total_steps": 10590, "loss": 1.9125, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8699498052174205e-05, "epoch": 0.5240793201133145, "percentage": 10.48, "elapsed_time": "0:29:49", "remaining_time": "4:14:44"}
+{"current_steps": 1120, "total_steps": 10590, "loss": 2.001, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.867574005968847e-05, "epoch": 0.5288007554296507, "percentage": 10.58, "elapsed_time": "0:30:02", "remaining_time": "4:14:04"}
+{"current_steps": 1130, "total_steps": 10590, "loss": 1.8442, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8651772919540686e-05, "epoch": 0.5335221907459868, "percentage": 10.67, "elapsed_time": "0:30:16", "remaining_time": "4:13:26"}
+{"current_steps": 1140, "total_steps": 10590, "loss": 1.9932, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.862759684345269e-05, "epoch": 0.5382436260623229, "percentage": 10.76, "elapsed_time": "0:30:28", "remaining_time": "4:12:33"}
+{"current_steps": 1150, "total_steps": 10590, "loss": 1.8388, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.860321204499205e-05, "epoch": 0.5429650613786591, "percentage": 10.86, "elapsed_time": "0:30:39", "remaining_time": "4:11:43"}
+{"current_steps": 1160, "total_steps": 10590, "loss": 1.7975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.857861873957011e-05, "epoch": 0.5476864966949953, "percentage": 10.95, "elapsed_time": "0:30:52", "remaining_time": "4:10:59"}
+{"current_steps": 1170, "total_steps": 10590, "loss": 1.848, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.855381714444018e-05, "epoch": 0.5524079320113314, "percentage": 11.05, "elapsed_time": "0:31:04", "remaining_time": "4:10:14"}
+{"current_steps": 1180, "total_steps": 10590, "loss": 1.9311, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8528807478695535e-05, "epoch": 0.5571293673276676, "percentage": 11.14, "elapsed_time": "0:31:16", "remaining_time": "4:09:26"}
+{"current_steps": 1190, "total_steps": 10590, "loss": 1.8705, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.850358996326753e-05, "epoch": 0.5618508026440038, "percentage": 11.24, "elapsed_time": "0:31:28", "remaining_time": "4:08:38"}
+{"current_steps": 1200, "total_steps": 10590, "loss": 1.9688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.84781648209236e-05, "epoch": 0.56657223796034, "percentage": 11.33, "elapsed_time": "0:31:41", "remaining_time": "4:07:56"}
+{"current_steps": 1200, "total_steps": 10590, "loss": null, "eval_loss": 1.9264005422592163, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.56657223796034, "percentage": 11.33, "elapsed_time": "0:31:41", "remaining_time": "4:07:56"}
+{"current_steps": 1210, "total_steps": 10590, "loss": 1.9038, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8452532276265364e-05, "epoch": 0.5712936732766761, "percentage": 11.43, "elapsed_time": "0:32:33", "remaining_time": "4:12:25"}
+{"current_steps": 1220, "total_steps": 10590, "loss": 1.9581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.842669255572656e-05, "epoch": 0.5760151085930123, "percentage": 11.52, "elapsed_time": "0:32:45", "remaining_time": "4:11:34"}
+{"current_steps": 1230, "total_steps": 10590, "loss": 1.8913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8400645887571126e-05, "epoch": 0.5807365439093485, "percentage": 11.61, "elapsed_time": "0:32:57", "remaining_time": "4:10:51"}
+{"current_steps": 1240, "total_steps": 10590, "loss": 1.8597, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.837439250189111e-05, "epoch": 0.5854579792256847, "percentage": 11.71, "elapsed_time": "0:33:10", "remaining_time": "4:10:08"}
+{"current_steps": 1250, "total_steps": 10590, "loss": 1.9072, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.834793263060468e-05, "epoch": 0.5901794145420207, "percentage": 11.8, "elapsed_time": "0:33:23", "remaining_time": "4:09:30"}
+{"current_steps": 1260, "total_steps": 10590, "loss": 1.9781, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.832126650745405e-05, "epoch": 0.5949008498583569, "percentage": 11.9, "elapsed_time": "0:33:35", "remaining_time": "4:08:47"}
+{"current_steps": 1270, "total_steps": 10590, "loss": 1.885, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.829439436800346e-05, "epoch": 0.5996222851746931, "percentage": 11.99, "elapsed_time": "0:33:46", "remaining_time": "4:07:53"}
+{"current_steps": 1280, "total_steps": 10590, "loss": 1.8891, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8267316449637054e-05, "epoch": 0.6043437204910292, "percentage": 12.09, "elapsed_time": "0:33:59", "remaining_time": "4:07:12"}
+{"current_steps": 1290, "total_steps": 10590, "loss": 1.8654, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8240032991556765e-05, "epoch": 0.6090651558073654, "percentage": 12.18, "elapsed_time": "0:34:10", "remaining_time": "4:06:21"}
+{"current_steps": 1300, "total_steps": 10590, "loss": 1.8383, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.821254423478027e-05, "epoch": 0.6137865911237016, "percentage": 12.28, "elapsed_time": "0:34:22", "remaining_time": "4:05:38"}
+{"current_steps": 1300, "total_steps": 10590, "loss": null, "eval_loss": 1.9191977977752686, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.6137865911237016, "percentage": 12.28, "elapsed_time": "0:34:22", "remaining_time": "4:05:38"}
+{"current_steps": 1310, "total_steps": 10590, "loss": 1.8538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8184850422138795e-05, "epoch": 0.6185080264400378, "percentage": 12.37, "elapsed_time": "0:35:15", "remaining_time": "4:09:44"}
+{"current_steps": 1320, "total_steps": 10590, "loss": 1.9403, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.815695179827502e-05, "epoch": 0.623229461756374, "percentage": 12.46, "elapsed_time": "0:35:27", "remaining_time": "4:08:58"}
+{"current_steps": 1330, "total_steps": 10590, "loss": 1.8925, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.812884860964086e-05, "epoch": 0.6279508970727101, "percentage": 12.56, "elapsed_time": "0:35:38", "remaining_time": "4:08:08"}
+{"current_steps": 1340, "total_steps": 10590, "loss": 1.8849, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8100541104495355e-05, "epoch": 0.6326723323890463, "percentage": 12.65, "elapsed_time": "0:35:51", "remaining_time": "4:07:34"}
+{"current_steps": 1350, "total_steps": 10590, "loss": 1.8733, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8072029532902426e-05, "epoch": 0.6373937677053825, "percentage": 12.75, "elapsed_time": "0:36:03", "remaining_time": "4:06:48"}
+{"current_steps": 1360, "total_steps": 10590, "loss": 1.9357, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8043314146728705e-05, "epoch": 0.6421152030217187, "percentage": 12.84, "elapsed_time": "0:36:16", "remaining_time": "4:06:11"}
+{"current_steps": 1370, "total_steps": 10590, "loss": 1.7913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8014395199641246e-05, "epoch": 0.6468366383380547, "percentage": 12.94, "elapsed_time": "0:36:27", "remaining_time": "4:05:22"}
+{"current_steps": 1380, "total_steps": 10590, "loss": 1.9151, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.798527294710538e-05, "epoch": 0.6515580736543909, "percentage": 13.03, "elapsed_time": "0:36:40", "remaining_time": "4:04:42"}
+{"current_steps": 1390, "total_steps": 10590, "loss": 1.9297, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.795594764638237e-05, "epoch": 0.6562795089707271, "percentage": 13.13, "elapsed_time": "0:36:52", "remaining_time": "4:04:01"}
+{"current_steps": 1400, "total_steps": 10590, "loss": 1.9032, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.792641955652718e-05, "epoch": 0.6610009442870632, "percentage": 13.22, "elapsed_time": "0:37:04", "remaining_time": "4:03:24"}
+{"current_steps": 1400, "total_steps": 10590, "loss": null, "eval_loss": 1.914588212966919, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.6610009442870632, "percentage": 13.22, "elapsed_time": "0:37:04", "remaining_time": "4:03:24"}
+{"current_steps": 1410, "total_steps": 10590, "loss": 1.9032, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7896688938386195e-05, "epoch": 0.6657223796033994, "percentage": 13.31, "elapsed_time": "0:37:59", "remaining_time": "4:07:22"}
+{"current_steps": 1420, "total_steps": 10590, "loss": 1.8854, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.786675605459487e-05, "epoch": 0.6704438149197356, "percentage": 13.41, "elapsed_time": "0:38:10", "remaining_time": "4:06:31"}
+{"current_steps": 1430, "total_steps": 10590, "loss": 1.8865, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7836621169575494e-05, "epoch": 0.6751652502360718, "percentage": 13.5, "elapsed_time": "0:38:23", "remaining_time": "4:05:52"}
+{"current_steps": 1440, "total_steps": 10590, "loss": 1.7515, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7806284549534755e-05, "epoch": 0.6798866855524079, "percentage": 13.6, "elapsed_time": "0:38:35", "remaining_time": "4:05:10"}
+{"current_steps": 1450, "total_steps": 10590, "loss": 1.9624, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7775746462461446e-05, "epoch": 0.6846081208687441, "percentage": 13.69, "elapsed_time": "0:38:47", "remaining_time": "4:04:30"}
+{"current_steps": 1460, "total_steps": 10590, "loss": 1.7875, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7745007178124114e-05, "epoch": 0.6893295561850803, "percentage": 13.79, "elapsed_time": "0:38:59", "remaining_time": "4:03:51"}
+{"current_steps": 1470, "total_steps": 10590, "loss": 1.8984, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.771406696806861e-05, "epoch": 0.6940509915014165, "percentage": 13.88, "elapsed_time": "0:39:11", "remaining_time": "4:03:08"}
+{"current_steps": 1480, "total_steps": 10590, "loss": 1.8594, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7682926105615754e-05, "epoch": 0.6987724268177526, "percentage": 13.98, "elapsed_time": "0:39:23", "remaining_time": "4:02:26"}
+{"current_steps": 1490, "total_steps": 10590, "loss": 1.8484, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.76515848658589e-05, "epoch": 0.7034938621340887, "percentage": 14.07, "elapsed_time": "0:39:35", "remaining_time": "4:01:48"}
+{"current_steps": 1500, "total_steps": 10590, "loss": 1.9295, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.76200435256615e-05, "epoch": 0.7082152974504249, "percentage": 14.16, "elapsed_time": "0:39:47", "remaining_time": "4:01:05"}
+{"current_steps": 1500, "total_steps": 10590, "loss": null, "eval_loss": 1.9108749628067017, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.7082152974504249, "percentage": 14.16, "elapsed_time": "0:39:47", "remaining_time": "4:01:05"}
+{"current_steps": 1510, "total_steps": 10590, "loss": 1.8586, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.758830236365465e-05, "epoch": 0.7129367327667611, "percentage": 14.26, "elapsed_time": "0:40:40", "remaining_time": "4:04:36"}
+{"current_steps": 1520, "total_steps": 10590, "loss": 1.9794, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7556361660234634e-05, "epoch": 0.7176581680830972, "percentage": 14.35, "elapsed_time": "0:40:52", "remaining_time": "4:03:55"}
+{"current_steps": 1530, "total_steps": 10590, "loss": 1.9122, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.752422169756048e-05, "epoch": 0.7223796033994334, "percentage": 14.45, "elapsed_time": "0:41:05", "remaining_time": "4:03:16"}
+{"current_steps": 1540, "total_steps": 10590, "loss": 1.9197, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.749188275955143e-05, "epoch": 0.7271010387157696, "percentage": 14.54, "elapsed_time": "0:41:16", "remaining_time": "4:02:33"}
+{"current_steps": 1550, "total_steps": 10590, "loss": 1.8548, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.745934513188442e-05, "epoch": 0.7318224740321058, "percentage": 14.64, "elapsed_time": "0:41:29", "remaining_time": "4:02:00"}
+{"current_steps": 1560, "total_steps": 10590, "loss": 1.8857, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7426609101991605e-05, "epoch": 0.7365439093484419, "percentage": 14.73, "elapsed_time": "0:41:41", "remaining_time": "4:01:17"}
+{"current_steps": 1570, "total_steps": 10590, "loss": 1.876, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.739367495905778e-05, "epoch": 0.7412653446647781, "percentage": 14.83, "elapsed_time": "0:41:52", "remaining_time": "4:00:37"}
+{"current_steps": 1580, "total_steps": 10590, "loss": 1.9355, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.736054299401785e-05, "epoch": 0.7459867799811143, "percentage": 14.92, "elapsed_time": "0:42:05", "remaining_time": "3:59:59"}
+{"current_steps": 1590, "total_steps": 10590, "loss": 1.9286, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7327213499554234e-05, "epoch": 0.7507082152974505, "percentage": 15.01, "elapsed_time": "0:42:16", "remaining_time": "3:59:16"}
+{"current_steps": 1600, "total_steps": 10590, "loss": 1.8207, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7293686770094294e-05, "epoch": 0.7554296506137866, "percentage": 15.11, "elapsed_time": "0:42:27", "remaining_time": "3:58:32"}
+{"current_steps": 1600, "total_steps": 10590, "loss": null, "eval_loss": 1.906082034111023, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.7554296506137866, "percentage": 15.11, "elapsed_time": "0:42:27", "remaining_time": "3:58:32"}
+{"current_steps": 1610, "total_steps": 10590, "loss": 1.9245, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.725996310180776e-05, "epoch": 0.7601510859301227, "percentage": 15.2, "elapsed_time": "0:43:21", "remaining_time": "4:01:50"}
+{"current_steps": 1620, "total_steps": 10590, "loss": 1.8556, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7226042792604046e-05, "epoch": 0.7648725212464589, "percentage": 15.3, "elapsed_time": "0:43:32", "remaining_time": "4:01:07"}
+{"current_steps": 1630, "total_steps": 10590, "loss": 1.9757, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.719192614212969e-05, "epoch": 0.7695939565627951, "percentage": 15.39, "elapsed_time": "0:43:44", "remaining_time": "4:00:28"}
+{"current_steps": 1640, "total_steps": 10590, "loss": 2.0371, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7157613451765686e-05, "epoch": 0.7743153918791312, "percentage": 15.49, "elapsed_time": "0:43:56", "remaining_time": "3:59:48"}
+{"current_steps": 1650, "total_steps": 10590, "loss": 1.8646, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7123105024624776e-05, "epoch": 0.7790368271954674, "percentage": 15.58, "elapsed_time": "0:44:08", "remaining_time": "3:59:09"}
+{"current_steps": 1660, "total_steps": 10590, "loss": 1.8383, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.708840116554883e-05, "epoch": 0.7837582625118036, "percentage": 15.68, "elapsed_time": "0:44:21", "remaining_time": "3:58:35"}
+{"current_steps": 1670, "total_steps": 10590, "loss": 1.9008, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7053502181106145e-05, "epoch": 0.7884796978281398, "percentage": 15.77, "elapsed_time": "0:44:33", "remaining_time": "3:57:59"}
+{"current_steps": 1680, "total_steps": 10590, "loss": 1.8378, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.70184083795887e-05, "epoch": 0.7932011331444759, "percentage": 15.86, "elapsed_time": "0:44:44", "remaining_time": "3:57:18"}
+{"current_steps": 1690, "total_steps": 10590, "loss": 1.8468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.698312007100947e-05, "epoch": 0.7979225684608121, "percentage": 15.96, "elapsed_time": "0:44:56", "remaining_time": "3:56:40"}
+{"current_steps": 1700, "total_steps": 10590, "loss": 1.9119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.694763756709967e-05, "epoch": 0.8026440037771483, "percentage": 16.05, "elapsed_time": "0:45:07", "remaining_time": "3:55:57"}
+{"current_steps": 1700, "total_steps": 10590, "loss": null, "eval_loss": 1.9032281637191772, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.8026440037771483, "percentage": 16.05, "elapsed_time": "0:45:07", "remaining_time": "3:55:57"}
+{"current_steps": 1710, "total_steps": 10590, "loss": 1.9081, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.691196118130601e-05, "epoch": 0.8073654390934845, "percentage": 16.15, "elapsed_time": "0:45:59", "remaining_time": "3:58:52"}
+{"current_steps": 1720, "total_steps": 10590, "loss": 1.8604, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.687609122878791e-05, "epoch": 0.8120868744098206, "percentage": 16.24, "elapsed_time": "0:46:10", "remaining_time": "3:58:07"}
+{"current_steps": 1730, "total_steps": 10590, "loss": 1.7843, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6840028026414745e-05, "epoch": 0.8168083097261567, "percentage": 16.34, "elapsed_time": "0:46:22", "remaining_time": "3:57:32"}
+{"current_steps": 1740, "total_steps": 10590, "loss": 1.8666, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6803771892763004e-05, "epoch": 0.8215297450424929, "percentage": 16.43, "elapsed_time": "0:46:33", "remaining_time": "3:56:49"}
+{"current_steps": 1750, "total_steps": 10590, "loss": 1.8538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.676732314811353e-05, "epoch": 0.826251180358829, "percentage": 16.53, "elapsed_time": "0:46:46", "remaining_time": "3:56:15"}
+{"current_steps": 1760, "total_steps": 10590, "loss": 1.8492, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.673068211444862e-05, "epoch": 0.8309726156751652, "percentage": 16.62, "elapsed_time": "0:46:57", "remaining_time": "3:55:35"}
+{"current_steps": 1770, "total_steps": 10590, "loss": 1.8554, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.669384911544927e-05, "epoch": 0.8356940509915014, "percentage": 16.71, "elapsed_time": "0:47:08", "remaining_time": "3:54:56"}
+{"current_steps": 1780, "total_steps": 10590, "loss": 1.9333, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.665682447649222e-05, "epoch": 0.8404154863078376, "percentage": 16.81, "elapsed_time": "0:47:21", "remaining_time": "3:54:24"}
+{"current_steps": 1790, "total_steps": 10590, "loss": 1.7886, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.661960852464717e-05, "epoch": 0.8451369216241738, "percentage": 16.9, "elapsed_time": "0:47:33", "remaining_time": "3:53:47"}
+{"current_steps": 1800, "total_steps": 10590, "loss": 1.8392, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6582201588673816e-05, "epoch": 0.8498583569405099, "percentage": 17.0, "elapsed_time": "0:47:45", "remaining_time": "3:53:13"}
+{"current_steps": 1800, "total_steps": 10590, "loss": null, "eval_loss": 1.9019125699996948, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.8498583569405099, "percentage": 17.0, "elapsed_time": "0:47:45", "remaining_time": "3:53:13"}
+{"current_steps": 1810, "total_steps": 10590, "loss": 1.8178, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6544603999018966e-05, "epoch": 0.8545797922568461, "percentage": 17.09, "elapsed_time": "0:48:39", "remaining_time": "3:56:01"}
+{"current_steps": 1820, "total_steps": 10590, "loss": 1.9773, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6506816087813685e-05, "epoch": 0.8593012275731823, "percentage": 17.19, "elapsed_time": "0:48:52", "remaining_time": "3:55:30"}
+{"current_steps": 1830, "total_steps": 10590, "loss": 1.7574, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.646883818887025e-05, "epoch": 0.8640226628895185, "percentage": 17.28, "elapsed_time": "0:49:05", "remaining_time": "3:55:01"}
+{"current_steps": 1840, "total_steps": 10590, "loss": 1.9249, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6430670637679295e-05, "epoch": 0.8687440982058546, "percentage": 17.37, "elapsed_time": "0:49:18", "remaining_time": "3:54:28"}
+{"current_steps": 1850, "total_steps": 10590, "loss": 1.8666, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.63923137714068e-05, "epoch": 0.8734655335221907, "percentage": 17.47, "elapsed_time": "0:49:30", "remaining_time": "3:53:53"}
+{"current_steps": 1860, "total_steps": 10590, "loss": 1.8948, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.635376792889111e-05, "epoch": 0.8781869688385269, "percentage": 17.56, "elapsed_time": "0:49:42", "remaining_time": "3:53:18"}
+{"current_steps": 1870, "total_steps": 10590, "loss": 1.8183, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6315033450639996e-05, "epoch": 0.882908404154863, "percentage": 17.66, "elapsed_time": "0:49:54", "remaining_time": "3:52:43"}
+{"current_steps": 1880, "total_steps": 10590, "loss": 1.8918, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6276110678827555e-05, "epoch": 0.8876298394711992, "percentage": 17.75, "elapsed_time": "0:50:06", "remaining_time": "3:52:09"}
+{"current_steps": 1890, "total_steps": 10590, "loss": 1.8303, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6236999957291275e-05, "epoch": 0.8923512747875354, "percentage": 17.85, "elapsed_time": "0:50:18", "remaining_time": "3:51:32"}
+{"current_steps": 1900, "total_steps": 10590, "loss": 1.961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.619770163152896e-05, "epoch": 0.8970727101038716, "percentage": 17.94, "elapsed_time": "0:50:30", "remaining_time": "3:51:00"}
+{"current_steps": 1900, "total_steps": 10590, "loss": null, "eval_loss": 1.8994309902191162, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.8970727101038716, "percentage": 17.94, "elapsed_time": "0:50:30", "remaining_time": "3:51:00"}
+{"current_steps": 1910, "total_steps": 10590, "loss": 1.8132, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.615821604869564e-05, "epoch": 0.9017941454202077, "percentage": 18.04, "elapsed_time": "0:51:24", "remaining_time": "3:53:35"}
+{"current_steps": 1920, "total_steps": 10590, "loss": 1.8418, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.61185435576006e-05, "epoch": 0.9065155807365439, "percentage": 18.13, "elapsed_time": "0:51:35", "remaining_time": "3:53:00"}
+{"current_steps": 1930, "total_steps": 10590, "loss": 1.8774, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.607868450870421e-05, "epoch": 0.9112370160528801, "percentage": 18.22, "elapsed_time": "0:51:48", "remaining_time": "3:52:26"}
+{"current_steps": 1940, "total_steps": 10590, "loss": 1.9419, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6038639254114855e-05, "epoch": 0.9159584513692163, "percentage": 18.32, "elapsed_time": "0:51:59", "remaining_time": "3:51:48"}
+{"current_steps": 1950, "total_steps": 10590, "loss": 2.0166, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.599840814758587e-05, "epoch": 0.9206798866855525, "percentage": 18.41, "elapsed_time": "0:52:13", "remaining_time": "3:51:21"}
+{"current_steps": 1960, "total_steps": 10590, "loss": 1.9425, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5957991544512316e-05, "epoch": 0.9254013220018886, "percentage": 18.51, "elapsed_time": "0:52:24", "remaining_time": "3:50:46"}
+{"current_steps": 1970, "total_steps": 10590, "loss": 1.8323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.591738980192796e-05, "epoch": 0.9301227573182247, "percentage": 18.6, "elapsed_time": "0:52:36", "remaining_time": "3:50:12"}
+{"current_steps": 1980, "total_steps": 10590, "loss": 1.8055, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.587660327850203e-05, "epoch": 0.9348441926345609, "percentage": 18.7, "elapsed_time": "0:52:47", "remaining_time": "3:49:34"}
+{"current_steps": 1990, "total_steps": 10590, "loss": 1.7541, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.583563233453607e-05, "epoch": 0.939565627950897, "percentage": 18.79, "elapsed_time": "0:53:01", "remaining_time": "3:49:08"}
+{"current_steps": 2000, "total_steps": 10590, "loss": 1.8913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.579447733196079e-05, "epoch": 0.9442870632672332, "percentage": 18.89, "elapsed_time": "0:53:13", "remaining_time": "3:48:37"}
+{"current_steps": 2000, "total_steps": 10590, "loss": null, "eval_loss": 1.8945337533950806, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.9442870632672332, "percentage": 18.89, "elapsed_time": "0:53:13", "remaining_time": "3:48:37"}
+{"current_steps": 2010, "total_steps": 10590, "loss": 1.8698, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5753138634332835e-05, "epoch": 0.9490084985835694, "percentage": 18.98, "elapsed_time": "0:54:07", "remaining_time": "3:51:01"}
+{"current_steps": 2020, "total_steps": 10590, "loss": 1.893, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5711616606831576e-05, "epoch": 0.9537299338999056, "percentage": 19.07, "elapsed_time": "0:54:19", "remaining_time": "3:50:26"}
+{"current_steps": 2030, "total_steps": 10590, "loss": 1.965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.566991161625589e-05, "epoch": 0.9584513692162417, "percentage": 19.17, "elapsed_time": "0:54:29", "remaining_time": "3:49:48"}
+{"current_steps": 2040, "total_steps": 10590, "loss": 1.7615, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.562802403102093e-05, "epoch": 0.9631728045325779, "percentage": 19.26, "elapsed_time": "0:54:40", "remaining_time": "3:49:10"}
+{"current_steps": 2050, "total_steps": 10590, "loss": 1.8079, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5585954221154856e-05, "epoch": 0.9678942398489141, "percentage": 19.36, "elapsed_time": "0:54:52", "remaining_time": "3:48:35"}
+{"current_steps": 2060, "total_steps": 10590, "loss": 1.8026, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.554370255829558e-05, "epoch": 0.9726156751652503, "percentage": 19.45, "elapsed_time": "0:55:04", "remaining_time": "3:48:01"}
+{"current_steps": 2070, "total_steps": 10590, "loss": 1.8584, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.550126941568744e-05, "epoch": 0.9773371104815864, "percentage": 19.55, "elapsed_time": "0:55:16", "remaining_time": "3:47:30"}
+{"current_steps": 2080, "total_steps": 10590, "loss": 1.7369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5458655168177974e-05, "epoch": 0.9820585457979226, "percentage": 19.64, "elapsed_time": "0:55:29", "remaining_time": "3:47:00"}
+{"current_steps": 2090, "total_steps": 10590, "loss": 1.8483, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.541586019221457e-05, "epoch": 0.9867799811142587, "percentage": 19.74, "elapsed_time": "0:55:42", "remaining_time": "3:46:34"}
+{"current_steps": 2100, "total_steps": 10590, "loss": 1.8187, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5372884865841114e-05, "epoch": 0.9915014164305949, "percentage": 19.83, "elapsed_time": "0:55:54", "remaining_time": "3:46:02"}
+{"current_steps": 2100, "total_steps": 10590, "loss": null, "eval_loss": 1.8941270112991333, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.9915014164305949, "percentage": 19.83, "elapsed_time": "0:55:54", "remaining_time": "3:46:02"}
+{"current_steps": 2110, "total_steps": 10590, "loss": 1.9258, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.532972956869471e-05, "epoch": 0.996222851746931, "percentage": 19.92, "elapsed_time": "0:56:48", "remaining_time": "3:48:16"}
+{"current_steps": 2120, "total_steps": 10590, "loss": 1.8346, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.528639468200226e-05, "epoch": 1.0009442870632672, "percentage": 20.02, "elapsed_time": "0:56:59", "remaining_time": "3:47:42"}
+{"current_steps": 2130, "total_steps": 10590, "loss": 1.7275, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.524288058857717e-05, "epoch": 1.0056657223796035, "percentage": 20.11, "elapsed_time": "0:57:11", "remaining_time": "3:47:10"}
+{"current_steps": 2140, "total_steps": 10590, "loss": 1.6719, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.51991876728159e-05, "epoch": 1.0103871576959396, "percentage": 20.21, "elapsed_time": "0:57:25", "remaining_time": "3:46:44"}
+{"current_steps": 2150, "total_steps": 10590, "loss": 1.8067, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.515531632069461e-05, "epoch": 1.0151085930122756, "percentage": 20.3, "elapsed_time": "0:57:37", "remaining_time": "3:46:11"}
+{"current_steps": 2160, "total_steps": 10590, "loss": 1.7311, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.511126691976574e-05, "epoch": 1.019830028328612, "percentage": 20.4, "elapsed_time": "0:57:50", "remaining_time": "3:45:42"}
+{"current_steps": 2170, "total_steps": 10590, "loss": 1.8171, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.506703985915457e-05, "epoch": 1.024551463644948, "percentage": 20.49, "elapsed_time": "0:58:01", "remaining_time": "3:45:07"}
+{"current_steps": 2180, "total_steps": 10590, "loss": 1.6716, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.502263552955581e-05, "epoch": 1.0292728989612843, "percentage": 20.59, "elapsed_time": "0:58:13", "remaining_time": "3:44:37"}
+{"current_steps": 2190, "total_steps": 10590, "loss": 1.7456, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.497805432323015e-05, "epoch": 1.0339943342776203, "percentage": 20.68, "elapsed_time": "0:58:24", "remaining_time": "3:44:03"}
+{"current_steps": 2200, "total_steps": 10590, "loss": 1.7296, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4933296634000734e-05, "epoch": 1.0387157695939566, "percentage": 20.77, "elapsed_time": "0:58:36", "remaining_time": "3:43:30"}
+{"current_steps": 2200, "total_steps": 10590, "loss": null, "eval_loss": 1.9005860090255737, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.0387157695939566, "percentage": 20.77, "elapsed_time": "0:58:36", "remaining_time": "3:43:30"}
+{"current_steps": 2210, "total_steps": 10590, "loss": 1.7202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4888362857249775e-05, "epoch": 1.0434372049102927, "percentage": 20.87, "elapsed_time": "0:59:29", "remaining_time": "3:45:36"}
+{"current_steps": 2220, "total_steps": 10590, "loss": 1.7879, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.484325338991499e-05, "epoch": 1.048158640226629, "percentage": 20.96, "elapsed_time": "0:59:40", "remaining_time": "3:45:00"}
+{"current_steps": 2230, "total_steps": 10590, "loss": 1.7202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4797968630486135e-05, "epoch": 1.052880075542965, "percentage": 21.06, "elapsed_time": "0:59:52", "remaining_time": "3:44:28"}
+{"current_steps": 2240, "total_steps": 10590, "loss": 1.6839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.475250897900144e-05, "epoch": 1.0576015108593013, "percentage": 21.15, "elapsed_time": "1:00:04", "remaining_time": "3:43:55"}
+{"current_steps": 2250, "total_steps": 10590, "loss": 1.7942, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.470687483704413e-05, "epoch": 1.0623229461756374, "percentage": 21.25, "elapsed_time": "1:00:16", "remaining_time": "3:43:23"}
+{"current_steps": 2260, "total_steps": 10590, "loss": 1.7255, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.466106660773885e-05, "epoch": 1.0670443814919737, "percentage": 21.34, "elapsed_time": "1:00:27", "remaining_time": "3:42:51"}
+{"current_steps": 2270, "total_steps": 10590, "loss": 1.6271, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4615084695748074e-05, "epoch": 1.0717658168083097, "percentage": 21.44, "elapsed_time": "1:00:39", "remaining_time": "3:42:21"}
+{"current_steps": 2280, "total_steps": 10590, "loss": 1.7444, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.456892950726861e-05, "epoch": 1.0764872521246458, "percentage": 21.53, "elapsed_time": "1:00:51", "remaining_time": "3:41:47"}
+{"current_steps": 2290, "total_steps": 10590, "loss": 1.7943, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.452260145002791e-05, "epoch": 1.081208687440982, "percentage": 21.62, "elapsed_time": "1:01:03", "remaining_time": "3:41:16"}
+{"current_steps": 2300, "total_steps": 10590, "loss": 1.6184, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.447610093328056e-05, "epoch": 1.0859301227573182, "percentage": 21.72, "elapsed_time": "1:01:15", "remaining_time": "3:40:46"}
+{"current_steps": 2300, "total_steps": 10590, "loss": null, "eval_loss": 1.9040113687515259, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.0859301227573182, "percentage": 21.72, "elapsed_time": "1:01:15", "remaining_time": "3:40:46"}
+{"current_steps": 2310, "total_steps": 10590, "loss": 1.7606, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4429428367804605e-05, "epoch": 1.0906515580736544, "percentage": 21.81, "elapsed_time": "1:02:08", "remaining_time": "3:42:45"}
+{"current_steps": 2320, "total_steps": 10590, "loss": 1.7419, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.438258416589794e-05, "epoch": 1.0953729933899905, "percentage": 21.91, "elapsed_time": "1:02:21", "remaining_time": "3:42:16"}
+{"current_steps": 2330, "total_steps": 10590, "loss": 1.6251, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.43355687413747e-05, "epoch": 1.1000944287063268, "percentage": 22.0, "elapsed_time": "1:02:32", "remaining_time": "3:41:44"}
+{"current_steps": 2340, "total_steps": 10590, "loss": 1.7576, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.428838250956153e-05, "epoch": 1.1048158640226629, "percentage": 22.1, "elapsed_time": "1:02:46", "remaining_time": "3:41:18"}
+{"current_steps": 2350, "total_steps": 10590, "loss": 1.758, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4241025887293976e-05, "epoch": 1.1095372993389991, "percentage": 22.19, "elapsed_time": "1:02:58", "remaining_time": "3:40:47"}
+{"current_steps": 2360, "total_steps": 10590, "loss": 1.759, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.419349929291279e-05, "epoch": 1.1142587346553352, "percentage": 22.29, "elapsed_time": "1:03:10", "remaining_time": "3:40:19"}
+{"current_steps": 2370, "total_steps": 10590, "loss": 1.7568, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.414580314626023e-05, "epoch": 1.1189801699716715, "percentage": 22.38, "elapsed_time": "1:03:23", "remaining_time": "3:39:51"}
+{"current_steps": 2380, "total_steps": 10590, "loss": 1.684, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4097937868676345e-05, "epoch": 1.1237016052880076, "percentage": 22.47, "elapsed_time": "1:03:34", "remaining_time": "3:39:20"}
+{"current_steps": 2390, "total_steps": 10590, "loss": 1.6816, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.404990388299527e-05, "epoch": 1.1284230406043436, "percentage": 22.57, "elapsed_time": "1:03:46", "remaining_time": "3:38:50"}
+{"current_steps": 2400, "total_steps": 10590, "loss": 1.6973, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4001701613541456e-05, "epoch": 1.13314447592068, "percentage": 22.66, "elapsed_time": "1:03:59", "remaining_time": "3:38:21"}
+{"current_steps": 2400, "total_steps": 10590, "loss": null, "eval_loss": 1.9056047201156616, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.13314447592068, "percentage": 22.66, "elapsed_time": "1:03:59", "remaining_time": "3:38:21"}
+{"current_steps": 2400, "total_steps": 10590, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.13314447592068, "percentage": 22.66, "elapsed_time": "1:03:59", "remaining_time": "3:38:21"}
+{"current_steps": 112, "total_steps": 112, "loss": null, "eval_loss": 1.8941270112991333, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.13314447592068, "percentage": 100.0, "elapsed_time": "1:05:30", "remaining_time": "0:00:00"}
diff --git a/llama2_13b_peft/topical_chat/trainer_state.json b/llama2_13b_peft/topical_chat/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ac5c19fcd44d9de7cbdcac1a3a93337c44a76d3
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/trainer_state.json
@@ -0,0 +1,1902 @@
+{
+ "best_metric": 1.8941270112991333,
+ "best_model_checkpoint": "ckpt/llama2_13b_other/topical_chat_no_sys/checkpoint-2100",
+ "epoch": 1.13314447592068,
+ "eval_steps": 100,
+ "global_step": 2400,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.004721435316336166,
+ "grad_norm": 0.80355304479599,
+ "learning_rate": 2.5e-05,
+ "loss": 2.8563,
+ "step": 10
+ },
+ {
+ "epoch": 0.009442870632672332,
+ "grad_norm": 0.9893380403518677,
+ "learning_rate": 5e-05,
+ "loss": 2.6853,
+ "step": 20
+ },
+ {
+ "epoch": 0.014164305949008499,
+ "grad_norm": 0.6218120455741882,
+ "learning_rate": 4.999988957701981e-05,
+ "loss": 2.1764,
+ "step": 30
+ },
+ {
+ "epoch": 0.018885741265344664,
+ "grad_norm": 0.7985787987709045,
+ "learning_rate": 4.99995583090547e-05,
+ "loss": 2.15,
+ "step": 40
+ },
+ {
+ "epoch": 0.023607176581680833,
+ "grad_norm": 0.6444200873374939,
+ "learning_rate": 4.999900619903104e-05,
+ "loss": 2.1033,
+ "step": 50
+ },
+ {
+ "epoch": 0.028328611898016998,
+ "grad_norm": 0.7646850943565369,
+ "learning_rate": 4.999823325182607e-05,
+ "loss": 2.1417,
+ "step": 60
+ },
+ {
+ "epoch": 0.033050047214353166,
+ "grad_norm": 1.0737015008926392,
+ "learning_rate": 4.9997239474267886e-05,
+ "loss": 2.1031,
+ "step": 70
+ },
+ {
+ "epoch": 0.03777148253068933,
+ "grad_norm": 0.8393265604972839,
+ "learning_rate": 4.9996024875135365e-05,
+ "loss": 1.9967,
+ "step": 80
+ },
+ {
+ "epoch": 0.042492917847025496,
+ "grad_norm": 0.9493579864501953,
+ "learning_rate": 4.999458946515808e-05,
+ "loss": 2.1262,
+ "step": 90
+ },
+ {
+ "epoch": 0.047214353163361665,
+ "grad_norm": 1.1938248872756958,
+ "learning_rate": 4.9992933257016194e-05,
+ "loss": 2.1904,
+ "step": 100
+ },
+ {
+ "epoch": 0.047214353163361665,
+ "eval_loss": 2.113694906234741,
+ "eval_runtime": 40.9172,
+ "eval_samples_per_second": 21.8,
+ "eval_steps_per_second": 2.737,
+ "step": 100
+ },
+ {
+ "epoch": 0.05193578847969783,
+ "grad_norm": 1.044633388519287,
+ "learning_rate": 4.99910562653404e-05,
+ "loss": 2.062,
+ "step": 110
+ },
+ {
+ "epoch": 0.056657223796033995,
+ "grad_norm": 0.8298002481460571,
+ "learning_rate": 4.9988958506711735e-05,
+ "loss": 1.9974,
+ "step": 120
+ },
+ {
+ "epoch": 0.061378659112370164,
+ "grad_norm": 0.9604804515838623,
+ "learning_rate": 4.9986639999661454e-05,
+ "loss": 2.124,
+ "step": 130
+ },
+ {
+ "epoch": 0.06610009442870633,
+ "grad_norm": 1.0080113410949707,
+ "learning_rate": 4.998410076467088e-05,
+ "loss": 1.9903,
+ "step": 140
+ },
+ {
+ "epoch": 0.0708215297450425,
+ "grad_norm": 1.0866179466247559,
+ "learning_rate": 4.9981340824171194e-05,
+ "loss": 2.1054,
+ "step": 150
+ },
+ {
+ "epoch": 0.07554296506137866,
+ "grad_norm": 0.8573047518730164,
+ "learning_rate": 4.997836020254328e-05,
+ "loss": 2.0274,
+ "step": 160
+ },
+ {
+ "epoch": 0.08026440037771483,
+ "grad_norm": 1.019667625427246,
+ "learning_rate": 4.997515892611746e-05,
+ "loss": 2.1484,
+ "step": 170
+ },
+ {
+ "epoch": 0.08498583569405099,
+ "grad_norm": 0.9652569890022278,
+ "learning_rate": 4.9971737023173286e-05,
+ "loss": 1.9468,
+ "step": 180
+ },
+ {
+ "epoch": 0.08970727101038715,
+ "grad_norm": 1.3909372091293335,
+ "learning_rate": 4.996809452393931e-05,
+ "loss": 2.0637,
+ "step": 190
+ },
+ {
+ "epoch": 0.09442870632672333,
+ "grad_norm": 0.7734001874923706,
+ "learning_rate": 4.996423146059277e-05,
+ "loss": 1.9627,
+ "step": 200
+ },
+ {
+ "epoch": 0.09442870632672333,
+ "eval_loss": 2.058908462524414,
+ "eval_runtime": 41.0852,
+ "eval_samples_per_second": 21.711,
+ "eval_steps_per_second": 2.726,
+ "step": 200
+ },
+ {
+ "epoch": 0.09915014164305949,
+ "grad_norm": 0.8814783096313477,
+ "learning_rate": 4.996014786725935e-05,
+ "loss": 1.9765,
+ "step": 210
+ },
+ {
+ "epoch": 0.10387157695939565,
+ "grad_norm": 1.1643282175064087,
+ "learning_rate": 4.9955843780012846e-05,
+ "loss": 2.0651,
+ "step": 220
+ },
+ {
+ "epoch": 0.10859301227573183,
+ "grad_norm": 1.2584123611450195,
+ "learning_rate": 4.995131923687488e-05,
+ "loss": 1.9729,
+ "step": 230
+ },
+ {
+ "epoch": 0.11331444759206799,
+ "grad_norm": 1.1681088209152222,
+ "learning_rate": 4.994657427781453e-05,
+ "loss": 2.0653,
+ "step": 240
+ },
+ {
+ "epoch": 0.11803588290840415,
+ "grad_norm": 1.009667158126831,
+ "learning_rate": 4.994160894474799e-05,
+ "loss": 2.185,
+ "step": 250
+ },
+ {
+ "epoch": 0.12275731822474033,
+ "grad_norm": 1.0578892230987549,
+ "learning_rate": 4.993642328153822e-05,
+ "loss": 2.0668,
+ "step": 260
+ },
+ {
+ "epoch": 0.1274787535410765,
+ "grad_norm": 1.0033955574035645,
+ "learning_rate": 4.993101733399453e-05,
+ "loss": 2.0643,
+ "step": 270
+ },
+ {
+ "epoch": 0.13220018885741266,
+ "grad_norm": 1.250969648361206,
+ "learning_rate": 4.9925391149872184e-05,
+ "loss": 2.0303,
+ "step": 280
+ },
+ {
+ "epoch": 0.1369216241737488,
+ "grad_norm": 1.1165454387664795,
+ "learning_rate": 4.9919544778871985e-05,
+ "loss": 1.9509,
+ "step": 290
+ },
+ {
+ "epoch": 0.141643059490085,
+ "grad_norm": 0.8889014720916748,
+ "learning_rate": 4.991347827263982e-05,
+ "loss": 2.0172,
+ "step": 300
+ },
+ {
+ "epoch": 0.141643059490085,
+ "eval_loss": 2.0220935344696045,
+ "eval_runtime": 41.0678,
+ "eval_samples_per_second": 21.72,
+ "eval_steps_per_second": 2.727,
+ "step": 300
+ },
+ {
+ "epoch": 0.14636449480642116,
+ "grad_norm": 1.275136947631836,
+ "learning_rate": 4.990719168476625e-05,
+ "loss": 1.9862,
+ "step": 310
+ },
+ {
+ "epoch": 0.1510859301227573,
+ "grad_norm": 0.959540069103241,
+ "learning_rate": 4.990068507078595e-05,
+ "loss": 1.9686,
+ "step": 320
+ },
+ {
+ "epoch": 0.1558073654390935,
+ "grad_norm": 0.8156929612159729,
+ "learning_rate": 4.989395848817731e-05,
+ "loss": 1.9751,
+ "step": 330
+ },
+ {
+ "epoch": 0.16052880075542966,
+ "grad_norm": 1.4008054733276367,
+ "learning_rate": 4.988701199636186e-05,
+ "loss": 1.9844,
+ "step": 340
+ },
+ {
+ "epoch": 0.1652502360717658,
+ "grad_norm": 1.4623981714248657,
+ "learning_rate": 4.987984565670382e-05,
+ "loss": 2.1042,
+ "step": 350
+ },
+ {
+ "epoch": 0.16997167138810199,
+ "grad_norm": 1.0944546461105347,
+ "learning_rate": 4.9872459532509436e-05,
+ "loss": 1.9839,
+ "step": 360
+ },
+ {
+ "epoch": 0.17469310670443816,
+ "grad_norm": 1.0253016948699951,
+ "learning_rate": 4.9864853689026556e-05,
+ "loss": 1.9492,
+ "step": 370
+ },
+ {
+ "epoch": 0.1794145420207743,
+ "grad_norm": 1.1482458114624023,
+ "learning_rate": 4.985702819344397e-05,
+ "loss": 1.9942,
+ "step": 380
+ },
+ {
+ "epoch": 0.18413597733711048,
+ "grad_norm": 1.094506859779358,
+ "learning_rate": 4.984898311489085e-05,
+ "loss": 1.9091,
+ "step": 390
+ },
+ {
+ "epoch": 0.18885741265344666,
+ "grad_norm": 1.3459991216659546,
+ "learning_rate": 4.98407185244361e-05,
+ "loss": 1.8965,
+ "step": 400
+ },
+ {
+ "epoch": 0.18885741265344666,
+ "eval_loss": 1.9968496561050415,
+ "eval_runtime": 41.0802,
+ "eval_samples_per_second": 21.714,
+ "eval_steps_per_second": 2.726,
+ "step": 400
+ },
+ {
+ "epoch": 0.1935788479697828,
+ "grad_norm": 1.5208889245986938,
+ "learning_rate": 4.983223449508779e-05,
+ "loss": 1.8476,
+ "step": 410
+ },
+ {
+ "epoch": 0.19830028328611898,
+ "grad_norm": 1.0982236862182617,
+ "learning_rate": 4.982353110179246e-05,
+ "loss": 1.9005,
+ "step": 420
+ },
+ {
+ "epoch": 0.20302171860245516,
+ "grad_norm": 1.247300624847412,
+ "learning_rate": 4.9814608421434485e-05,
+ "loss": 1.9098,
+ "step": 430
+ },
+ {
+ "epoch": 0.2077431539187913,
+ "grad_norm": 0.9733744859695435,
+ "learning_rate": 4.9805466532835376e-05,
+ "loss": 1.912,
+ "step": 440
+ },
+ {
+ "epoch": 0.21246458923512748,
+ "grad_norm": 1.2082431316375732,
+ "learning_rate": 4.979610551675311e-05,
+ "loss": 2.044,
+ "step": 450
+ },
+ {
+ "epoch": 0.21718602455146366,
+ "grad_norm": 0.9695537090301514,
+ "learning_rate": 4.978652545588137e-05,
+ "loss": 2.0439,
+ "step": 460
+ },
+ {
+ "epoch": 0.2219074598677998,
+ "grad_norm": 1.1563127040863037,
+ "learning_rate": 4.977672643484889e-05,
+ "loss": 2.017,
+ "step": 470
+ },
+ {
+ "epoch": 0.22662889518413598,
+ "grad_norm": 1.26643705368042,
+ "learning_rate": 4.976670854021862e-05,
+ "loss": 1.9652,
+ "step": 480
+ },
+ {
+ "epoch": 0.23135033050047216,
+ "grad_norm": 1.539183259010315,
+ "learning_rate": 4.975647186048703e-05,
+ "loss": 1.9621,
+ "step": 490
+ },
+ {
+ "epoch": 0.2360717658168083,
+ "grad_norm": 1.4867112636566162,
+ "learning_rate": 4.97460164860833e-05,
+ "loss": 1.9534,
+ "step": 500
+ },
+ {
+ "epoch": 0.2360717658168083,
+ "eval_loss": 1.982257604598999,
+ "eval_runtime": 41.1055,
+ "eval_samples_per_second": 21.7,
+ "eval_steps_per_second": 2.725,
+ "step": 500
+ },
+ {
+ "epoch": 0.24079320113314448,
+ "grad_norm": 1.3039374351501465,
+ "learning_rate": 4.973534250936851e-05,
+ "loss": 1.9625,
+ "step": 510
+ },
+ {
+ "epoch": 0.24551463644948066,
+ "grad_norm": 1.215240478515625,
+ "learning_rate": 4.972445002463485e-05,
+ "loss": 1.9728,
+ "step": 520
+ },
+ {
+ "epoch": 0.2502360717658168,
+ "grad_norm": 1.4657334089279175,
+ "learning_rate": 4.971333912810476e-05,
+ "loss": 1.9505,
+ "step": 530
+ },
+ {
+ "epoch": 0.254957507082153,
+ "grad_norm": 1.0312436819076538,
+ "learning_rate": 4.970200991793012e-05,
+ "loss": 1.9735,
+ "step": 540
+ },
+ {
+ "epoch": 0.25967894239848915,
+ "grad_norm": 1.0905646085739136,
+ "learning_rate": 4.969046249419133e-05,
+ "loss": 1.9265,
+ "step": 550
+ },
+ {
+ "epoch": 0.26440037771482533,
+ "grad_norm": 1.271078109741211,
+ "learning_rate": 4.967869695889647e-05,
+ "loss": 1.9292,
+ "step": 560
+ },
+ {
+ "epoch": 0.26912181303116145,
+ "grad_norm": 1.5857294797897339,
+ "learning_rate": 4.966671341598037e-05,
+ "loss": 1.9518,
+ "step": 570
+ },
+ {
+ "epoch": 0.2738432483474976,
+ "grad_norm": 1.201617956161499,
+ "learning_rate": 4.965451197130373e-05,
+ "loss": 1.9796,
+ "step": 580
+ },
+ {
+ "epoch": 0.2785646836638338,
+ "grad_norm": 1.4783813953399658,
+ "learning_rate": 4.964209273265212e-05,
+ "loss": 1.9494,
+ "step": 590
+ },
+ {
+ "epoch": 0.28328611898017,
+ "grad_norm": 1.237426996231079,
+ "learning_rate": 4.9629455809735105e-05,
+ "loss": 1.8621,
+ "step": 600
+ },
+ {
+ "epoch": 0.28328611898017,
+ "eval_loss": 1.9678794145584106,
+ "eval_runtime": 41.1041,
+ "eval_samples_per_second": 21.701,
+ "eval_steps_per_second": 2.725,
+ "step": 600
+ },
+ {
+ "epoch": 0.28800755429650615,
+ "grad_norm": 1.0407463312149048,
+ "learning_rate": 4.9616601314185206e-05,
+ "loss": 1.8698,
+ "step": 610
+ },
+ {
+ "epoch": 0.2927289896128423,
+ "grad_norm": 1.175148844718933,
+ "learning_rate": 4.9603529359556975e-05,
+ "loss": 1.9089,
+ "step": 620
+ },
+ {
+ "epoch": 0.29745042492917845,
+ "grad_norm": 1.4049919843673706,
+ "learning_rate": 4.959024006132593e-05,
+ "loss": 2.026,
+ "step": 630
+ },
+ {
+ "epoch": 0.3021718602455146,
+ "grad_norm": 1.3838235139846802,
+ "learning_rate": 4.95767335368876e-05,
+ "loss": 1.9059,
+ "step": 640
+ },
+ {
+ "epoch": 0.3068932955618508,
+ "grad_norm": 1.1748583316802979,
+ "learning_rate": 4.956300990555643e-05,
+ "loss": 1.973,
+ "step": 650
+ },
+ {
+ "epoch": 0.311614730878187,
+ "grad_norm": 1.4677692651748657,
+ "learning_rate": 4.954906928856476e-05,
+ "loss": 1.9296,
+ "step": 660
+ },
+ {
+ "epoch": 0.31633616619452315,
+ "grad_norm": 1.3763841390609741,
+ "learning_rate": 4.953491180906175e-05,
+ "loss": 1.9321,
+ "step": 670
+ },
+ {
+ "epoch": 0.3210576015108593,
+ "grad_norm": 1.053803563117981,
+ "learning_rate": 4.952053759211229e-05,
+ "loss": 2.0338,
+ "step": 680
+ },
+ {
+ "epoch": 0.32577903682719545,
+ "grad_norm": 1.45322585105896,
+ "learning_rate": 4.9505946764695885e-05,
+ "loss": 1.9328,
+ "step": 690
+ },
+ {
+ "epoch": 0.3305004721435316,
+ "grad_norm": 1.3070508241653442,
+ "learning_rate": 4.949113945570555e-05,
+ "loss": 1.9777,
+ "step": 700
+ },
+ {
+ "epoch": 0.3305004721435316,
+ "eval_loss": 1.9610685110092163,
+ "eval_runtime": 41.0861,
+ "eval_samples_per_second": 21.71,
+ "eval_steps_per_second": 2.726,
+ "step": 700
+ },
+ {
+ "epoch": 0.3352219074598678,
+ "grad_norm": 1.031301498413086,
+ "learning_rate": 4.947611579594666e-05,
+ "loss": 2.0113,
+ "step": 710
+ },
+ {
+ "epoch": 0.33994334277620397,
+ "grad_norm": 1.3410820960998535,
+ "learning_rate": 4.9460875918135804e-05,
+ "loss": 1.9097,
+ "step": 720
+ },
+ {
+ "epoch": 0.34466477809254015,
+ "grad_norm": 1.1345252990722656,
+ "learning_rate": 4.944541995689958e-05,
+ "loss": 1.9226,
+ "step": 730
+ },
+ {
+ "epoch": 0.3493862134088763,
+ "grad_norm": 1.2572389841079712,
+ "learning_rate": 4.9429748048773475e-05,
+ "loss": 1.9583,
+ "step": 740
+ },
+ {
+ "epoch": 0.35410764872521244,
+ "grad_norm": 1.2059199810028076,
+ "learning_rate": 4.941386033220058e-05,
+ "loss": 1.9101,
+ "step": 750
+ },
+ {
+ "epoch": 0.3588290840415486,
+ "grad_norm": 1.2732455730438232,
+ "learning_rate": 4.9397756947530414e-05,
+ "loss": 1.9238,
+ "step": 760
+ },
+ {
+ "epoch": 0.3635505193578848,
+ "grad_norm": 1.2681666612625122,
+ "learning_rate": 4.938143803701769e-05,
+ "loss": 1.867,
+ "step": 770
+ },
+ {
+ "epoch": 0.36827195467422097,
+ "grad_norm": 1.1062850952148438,
+ "learning_rate": 4.9364903744821014e-05,
+ "loss": 1.9233,
+ "step": 780
+ },
+ {
+ "epoch": 0.37299338999055714,
+ "grad_norm": 1.5268208980560303,
+ "learning_rate": 4.934815421700165e-05,
+ "loss": 1.8599,
+ "step": 790
+ },
+ {
+ "epoch": 0.3777148253068933,
+ "grad_norm": 1.4348393678665161,
+ "learning_rate": 4.933118960152222e-05,
+ "loss": 2.0865,
+ "step": 800
+ },
+ {
+ "epoch": 0.3777148253068933,
+ "eval_loss": 1.954448938369751,
+ "eval_runtime": 41.0954,
+ "eval_samples_per_second": 21.706,
+ "eval_steps_per_second": 2.725,
+ "step": 800
+ },
+ {
+ "epoch": 0.38243626062322944,
+ "grad_norm": 1.1293368339538574,
+ "learning_rate": 4.931401004824541e-05,
+ "loss": 1.9036,
+ "step": 810
+ },
+ {
+ "epoch": 0.3871576959395656,
+ "grad_norm": 1.2953282594680786,
+ "learning_rate": 4.92966157089326e-05,
+ "loss": 1.92,
+ "step": 820
+ },
+ {
+ "epoch": 0.3918791312559018,
+ "grad_norm": 1.5220308303833008,
+ "learning_rate": 4.927900673724259e-05,
+ "loss": 1.9894,
+ "step": 830
+ },
+ {
+ "epoch": 0.39660056657223797,
+ "grad_norm": 1.3064740896224976,
+ "learning_rate": 4.9261183288730176e-05,
+ "loss": 1.9978,
+ "step": 840
+ },
+ {
+ "epoch": 0.40132200188857414,
+ "grad_norm": 1.2408039569854736,
+ "learning_rate": 4.9243145520844834e-05,
+ "loss": 1.8262,
+ "step": 850
+ },
+ {
+ "epoch": 0.4060434372049103,
+ "grad_norm": 1.314778208732605,
+ "learning_rate": 4.9224893592929275e-05,
+ "loss": 1.907,
+ "step": 860
+ },
+ {
+ "epoch": 0.41076487252124644,
+ "grad_norm": 1.2585623264312744,
+ "learning_rate": 4.92064276662181e-05,
+ "loss": 1.9766,
+ "step": 870
+ },
+ {
+ "epoch": 0.4154863078375826,
+ "grad_norm": 1.2925095558166504,
+ "learning_rate": 4.9187747903836303e-05,
+ "loss": 1.9038,
+ "step": 880
+ },
+ {
+ "epoch": 0.4202077431539188,
+ "grad_norm": 1.3546730279922485,
+ "learning_rate": 4.9168854470797904e-05,
+ "loss": 1.9733,
+ "step": 890
+ },
+ {
+ "epoch": 0.42492917847025496,
+ "grad_norm": 1.3911082744598389,
+ "learning_rate": 4.914974753400443e-05,
+ "loss": 1.9662,
+ "step": 900
+ },
+ {
+ "epoch": 0.42492917847025496,
+ "eval_loss": 1.946061372756958,
+ "eval_runtime": 41.0731,
+ "eval_samples_per_second": 21.717,
+ "eval_steps_per_second": 2.727,
+ "step": 900
+ },
+ {
+ "epoch": 0.42965061378659114,
+ "grad_norm": 1.3016057014465332,
+ "learning_rate": 4.913042726224347e-05,
+ "loss": 2.0209,
+ "step": 910
+ },
+ {
+ "epoch": 0.4343720491029273,
+ "grad_norm": 1.200618863105774,
+ "learning_rate": 4.911089382618718e-05,
+ "loss": 1.8343,
+ "step": 920
+ },
+ {
+ "epoch": 0.43909348441926344,
+ "grad_norm": 1.2423232793807983,
+ "learning_rate": 4.909114739839079e-05,
+ "loss": 1.952,
+ "step": 930
+ },
+ {
+ "epoch": 0.4438149197355996,
+ "grad_norm": 1.1997716426849365,
+ "learning_rate": 4.907118815329104e-05,
+ "loss": 1.9064,
+ "step": 940
+ },
+ {
+ "epoch": 0.4485363550519358,
+ "grad_norm": 1.3008543252944946,
+ "learning_rate": 4.905101626720469e-05,
+ "loss": 1.883,
+ "step": 950
+ },
+ {
+ "epoch": 0.45325779036827196,
+ "grad_norm": 1.4466650485992432,
+ "learning_rate": 4.903063191832691e-05,
+ "loss": 1.8716,
+ "step": 960
+ },
+ {
+ "epoch": 0.45797922568460814,
+ "grad_norm": 1.3495376110076904,
+ "learning_rate": 4.901003528672975e-05,
+ "loss": 1.9534,
+ "step": 970
+ },
+ {
+ "epoch": 0.4627006610009443,
+ "grad_norm": 1.4013429880142212,
+ "learning_rate": 4.898922655436052e-05,
+ "loss": 1.8552,
+ "step": 980
+ },
+ {
+ "epoch": 0.46742209631728043,
+ "grad_norm": 1.716334581375122,
+ "learning_rate": 4.8968205905040207e-05,
+ "loss": 1.974,
+ "step": 990
+ },
+ {
+ "epoch": 0.4721435316336166,
+ "grad_norm": 1.435140609741211,
+ "learning_rate": 4.894697352446182e-05,
+ "loss": 1.8352,
+ "step": 1000
+ },
+ {
+ "epoch": 0.4721435316336166,
+ "eval_loss": 1.9375569820404053,
+ "eval_runtime": 41.1017,
+ "eval_samples_per_second": 21.702,
+ "eval_steps_per_second": 2.725,
+ "step": 1000
+ },
+ {
+ "epoch": 0.4768649669499528,
+ "grad_norm": 1.7291275262832642,
+ "learning_rate": 4.8925529600188794e-05,
+ "loss": 1.9139,
+ "step": 1010
+ },
+ {
+ "epoch": 0.48158640226628896,
+ "grad_norm": 1.2678258419036865,
+ "learning_rate": 4.8903874321653274e-05,
+ "loss": 1.8118,
+ "step": 1020
+ },
+ {
+ "epoch": 0.48630783758262514,
+ "grad_norm": 1.5206748247146606,
+ "learning_rate": 4.88820078801545e-05,
+ "loss": 1.9924,
+ "step": 1030
+ },
+ {
+ "epoch": 0.4910292728989613,
+ "grad_norm": 1.4560861587524414,
+ "learning_rate": 4.885993046885708e-05,
+ "loss": 1.8814,
+ "step": 1040
+ },
+ {
+ "epoch": 0.49575070821529743,
+ "grad_norm": 1.150933861732483,
+ "learning_rate": 4.883764228278931e-05,
+ "loss": 1.8928,
+ "step": 1050
+ },
+ {
+ "epoch": 0.5004721435316336,
+ "grad_norm": 1.4958614110946655,
+ "learning_rate": 4.881514351884141e-05,
+ "loss": 1.9071,
+ "step": 1060
+ },
+ {
+ "epoch": 0.5051935788479698,
+ "grad_norm": 1.296898603439331,
+ "learning_rate": 4.879243437576383e-05,
+ "loss": 1.939,
+ "step": 1070
+ },
+ {
+ "epoch": 0.509915014164306,
+ "grad_norm": 1.6953368186950684,
+ "learning_rate": 4.876951505416547e-05,
+ "loss": 1.8603,
+ "step": 1080
+ },
+ {
+ "epoch": 0.5146364494806421,
+ "grad_norm": 1.2347371578216553,
+ "learning_rate": 4.8746385756511915e-05,
+ "loss": 1.86,
+ "step": 1090
+ },
+ {
+ "epoch": 0.5193578847969783,
+ "grad_norm": 2.050234794616699,
+ "learning_rate": 4.872304668712364e-05,
+ "loss": 1.8973,
+ "step": 1100
+ },
+ {
+ "epoch": 0.5193578847969783,
+ "eval_loss": 1.932855248451233,
+ "eval_runtime": 41.1127,
+ "eval_samples_per_second": 21.696,
+ "eval_steps_per_second": 2.724,
+ "step": 1100
+ },
+ {
+ "epoch": 0.5240793201133145,
+ "grad_norm": 1.5210719108581543,
+ "learning_rate": 4.8699498052174205e-05,
+ "loss": 1.9125,
+ "step": 1110
+ },
+ {
+ "epoch": 0.5288007554296507,
+ "grad_norm": 1.17880380153656,
+ "learning_rate": 4.867574005968847e-05,
+ "loss": 2.001,
+ "step": 1120
+ },
+ {
+ "epoch": 0.5335221907459868,
+ "grad_norm": 1.2916531562805176,
+ "learning_rate": 4.8651772919540686e-05,
+ "loss": 1.8442,
+ "step": 1130
+ },
+ {
+ "epoch": 0.5382436260623229,
+ "grad_norm": 1.2535613775253296,
+ "learning_rate": 4.862759684345269e-05,
+ "loss": 1.9932,
+ "step": 1140
+ },
+ {
+ "epoch": 0.5429650613786591,
+ "grad_norm": 1.2579909563064575,
+ "learning_rate": 4.860321204499205e-05,
+ "loss": 1.8388,
+ "step": 1150
+ },
+ {
+ "epoch": 0.5476864966949953,
+ "grad_norm": 1.207390546798706,
+ "learning_rate": 4.857861873957011e-05,
+ "loss": 1.7975,
+ "step": 1160
+ },
+ {
+ "epoch": 0.5524079320113314,
+ "grad_norm": 1.574354648590088,
+ "learning_rate": 4.855381714444018e-05,
+ "loss": 1.848,
+ "step": 1170
+ },
+ {
+ "epoch": 0.5571293673276676,
+ "grad_norm": 1.6396162509918213,
+ "learning_rate": 4.8528807478695535e-05,
+ "loss": 1.9311,
+ "step": 1180
+ },
+ {
+ "epoch": 0.5618508026440038,
+ "grad_norm": 1.4158663749694824,
+ "learning_rate": 4.850358996326753e-05,
+ "loss": 1.8705,
+ "step": 1190
+ },
+ {
+ "epoch": 0.56657223796034,
+ "grad_norm": 1.4436795711517334,
+ "learning_rate": 4.84781648209236e-05,
+ "loss": 1.9688,
+ "step": 1200
+ },
+ {
+ "epoch": 0.56657223796034,
+ "eval_loss": 1.9264005422592163,
+ "eval_runtime": 41.1199,
+ "eval_samples_per_second": 21.693,
+ "eval_steps_per_second": 2.724,
+ "step": 1200
+ },
+ {
+ "epoch": 0.5712936732766761,
+ "grad_norm": 1.747074007987976,
+ "learning_rate": 4.8452532276265364e-05,
+ "loss": 1.9038,
+ "step": 1210
+ },
+ {
+ "epoch": 0.5760151085930123,
+ "grad_norm": 1.5841553211212158,
+ "learning_rate": 4.842669255572656e-05,
+ "loss": 1.9581,
+ "step": 1220
+ },
+ {
+ "epoch": 0.5807365439093485,
+ "grad_norm": 1.4784042835235596,
+ "learning_rate": 4.8400645887571126e-05,
+ "loss": 1.8913,
+ "step": 1230
+ },
+ {
+ "epoch": 0.5854579792256847,
+ "grad_norm": 1.332627296447754,
+ "learning_rate": 4.837439250189111e-05,
+ "loss": 1.8597,
+ "step": 1240
+ },
+ {
+ "epoch": 0.5901794145420207,
+ "grad_norm": 1.0213313102722168,
+ "learning_rate": 4.834793263060468e-05,
+ "loss": 1.9072,
+ "step": 1250
+ },
+ {
+ "epoch": 0.5949008498583569,
+ "grad_norm": 1.4618474245071411,
+ "learning_rate": 4.832126650745405e-05,
+ "loss": 1.9781,
+ "step": 1260
+ },
+ {
+ "epoch": 0.5996222851746931,
+ "grad_norm": 1.6018431186676025,
+ "learning_rate": 4.829439436800346e-05,
+ "loss": 1.885,
+ "step": 1270
+ },
+ {
+ "epoch": 0.6043437204910292,
+ "grad_norm": 1.3701698780059814,
+ "learning_rate": 4.8267316449637054e-05,
+ "loss": 1.8891,
+ "step": 1280
+ },
+ {
+ "epoch": 0.6090651558073654,
+ "grad_norm": 1.6125386953353882,
+ "learning_rate": 4.8240032991556765e-05,
+ "loss": 1.8654,
+ "step": 1290
+ },
+ {
+ "epoch": 0.6137865911237016,
+ "grad_norm": 1.3987213373184204,
+ "learning_rate": 4.821254423478027e-05,
+ "loss": 1.8383,
+ "step": 1300
+ },
+ {
+ "epoch": 0.6137865911237016,
+ "eval_loss": 1.9191977977752686,
+ "eval_runtime": 41.0939,
+ "eval_samples_per_second": 21.706,
+ "eval_steps_per_second": 2.725,
+ "step": 1300
+ },
+ {
+ "epoch": 0.6185080264400378,
+ "grad_norm": 1.4958666563034058,
+ "learning_rate": 4.8184850422138795e-05,
+ "loss": 1.8538,
+ "step": 1310
+ },
+ {
+ "epoch": 0.623229461756374,
+ "grad_norm": 1.6139039993286133,
+ "learning_rate": 4.815695179827502e-05,
+ "loss": 1.9403,
+ "step": 1320
+ },
+ {
+ "epoch": 0.6279508970727101,
+ "grad_norm": 1.429937481880188,
+ "learning_rate": 4.812884860964086e-05,
+ "loss": 1.8925,
+ "step": 1330
+ },
+ {
+ "epoch": 0.6326723323890463,
+ "grad_norm": 1.4869327545166016,
+ "learning_rate": 4.8100541104495355e-05,
+ "loss": 1.8849,
+ "step": 1340
+ },
+ {
+ "epoch": 0.6373937677053825,
+ "grad_norm": 1.32284677028656,
+ "learning_rate": 4.8072029532902426e-05,
+ "loss": 1.8733,
+ "step": 1350
+ },
+ {
+ "epoch": 0.6421152030217187,
+ "grad_norm": 1.659633994102478,
+ "learning_rate": 4.8043314146728705e-05,
+ "loss": 1.9357,
+ "step": 1360
+ },
+ {
+ "epoch": 0.6468366383380547,
+ "grad_norm": 1.524124264717102,
+ "learning_rate": 4.8014395199641246e-05,
+ "loss": 1.7913,
+ "step": 1370
+ },
+ {
+ "epoch": 0.6515580736543909,
+ "grad_norm": 1.294195294380188,
+ "learning_rate": 4.798527294710538e-05,
+ "loss": 1.9151,
+ "step": 1380
+ },
+ {
+ "epoch": 0.6562795089707271,
+ "grad_norm": 1.8346223831176758,
+ "learning_rate": 4.795594764638237e-05,
+ "loss": 1.9297,
+ "step": 1390
+ },
+ {
+ "epoch": 0.6610009442870632,
+ "grad_norm": 1.457963466644287,
+ "learning_rate": 4.792641955652718e-05,
+ "loss": 1.9032,
+ "step": 1400
+ },
+ {
+ "epoch": 0.6610009442870632,
+ "eval_loss": 1.914588212966919,
+ "eval_runtime": 41.1099,
+ "eval_samples_per_second": 21.698,
+ "eval_steps_per_second": 2.724,
+ "step": 1400
+ },
+ {
+ "epoch": 0.6657223796033994,
+ "grad_norm": 1.5672495365142822,
+ "learning_rate": 4.7896688938386195e-05,
+ "loss": 1.9032,
+ "step": 1410
+ },
+ {
+ "epoch": 0.6704438149197356,
+ "grad_norm": 1.5081193447113037,
+ "learning_rate": 4.786675605459487e-05,
+ "loss": 1.8854,
+ "step": 1420
+ },
+ {
+ "epoch": 0.6751652502360718,
+ "grad_norm": 1.450073003768921,
+ "learning_rate": 4.7836621169575494e-05,
+ "loss": 1.8865,
+ "step": 1430
+ },
+ {
+ "epoch": 0.6798866855524079,
+ "grad_norm": 1.4455468654632568,
+ "learning_rate": 4.7806284549534755e-05,
+ "loss": 1.7515,
+ "step": 1440
+ },
+ {
+ "epoch": 0.6846081208687441,
+ "grad_norm": 1.7873055934906006,
+ "learning_rate": 4.7775746462461446e-05,
+ "loss": 1.9624,
+ "step": 1450
+ },
+ {
+ "epoch": 0.6893295561850803,
+ "grad_norm": 1.5174776315689087,
+ "learning_rate": 4.7745007178124114e-05,
+ "loss": 1.7875,
+ "step": 1460
+ },
+ {
+ "epoch": 0.6940509915014165,
+ "grad_norm": 1.6502797603607178,
+ "learning_rate": 4.771406696806861e-05,
+ "loss": 1.8984,
+ "step": 1470
+ },
+ {
+ "epoch": 0.6987724268177526,
+ "grad_norm": 1.3501724004745483,
+ "learning_rate": 4.7682926105615754e-05,
+ "loss": 1.8594,
+ "step": 1480
+ },
+ {
+ "epoch": 0.7034938621340887,
+ "grad_norm": 1.442497730255127,
+ "learning_rate": 4.76515848658589e-05,
+ "loss": 1.8484,
+ "step": 1490
+ },
+ {
+ "epoch": 0.7082152974504249,
+ "grad_norm": 1.346633791923523,
+ "learning_rate": 4.76200435256615e-05,
+ "loss": 1.9295,
+ "step": 1500
+ },
+ {
+ "epoch": 0.7082152974504249,
+ "eval_loss": 1.9108749628067017,
+ "eval_runtime": 41.131,
+ "eval_samples_per_second": 21.687,
+ "eval_steps_per_second": 2.723,
+ "step": 1500
+ },
+ {
+ "epoch": 0.7129367327667611,
+ "grad_norm": 1.684790849685669,
+ "learning_rate": 4.758830236365465e-05,
+ "loss": 1.8586,
+ "step": 1510
+ },
+ {
+ "epoch": 0.7176581680830972,
+ "grad_norm": 1.508821725845337,
+ "learning_rate": 4.7556361660234634e-05,
+ "loss": 1.9794,
+ "step": 1520
+ },
+ {
+ "epoch": 0.7223796033994334,
+ "grad_norm": 1.526046872138977,
+ "learning_rate": 4.752422169756048e-05,
+ "loss": 1.9122,
+ "step": 1530
+ },
+ {
+ "epoch": 0.7271010387157696,
+ "grad_norm": 2.1538867950439453,
+ "learning_rate": 4.749188275955143e-05,
+ "loss": 1.9197,
+ "step": 1540
+ },
+ {
+ "epoch": 0.7318224740321058,
+ "grad_norm": 1.4204916954040527,
+ "learning_rate": 4.745934513188442e-05,
+ "loss": 1.8548,
+ "step": 1550
+ },
+ {
+ "epoch": 0.7365439093484419,
+ "grad_norm": 1.319259524345398,
+ "learning_rate": 4.7426609101991605e-05,
+ "loss": 1.8857,
+ "step": 1560
+ },
+ {
+ "epoch": 0.7412653446647781,
+ "grad_norm": 1.351597785949707,
+ "learning_rate": 4.739367495905778e-05,
+ "loss": 1.876,
+ "step": 1570
+ },
+ {
+ "epoch": 0.7459867799811143,
+ "grad_norm": 1.51447331905365,
+ "learning_rate": 4.736054299401785e-05,
+ "loss": 1.9355,
+ "step": 1580
+ },
+ {
+ "epoch": 0.7507082152974505,
+ "grad_norm": 1.2614985704421997,
+ "learning_rate": 4.7327213499554234e-05,
+ "loss": 1.9286,
+ "step": 1590
+ },
+ {
+ "epoch": 0.7554296506137866,
+ "grad_norm": 1.0692663192749023,
+ "learning_rate": 4.7293686770094294e-05,
+ "loss": 1.8207,
+ "step": 1600
+ },
+ {
+ "epoch": 0.7554296506137866,
+ "eval_loss": 1.906082034111023,
+ "eval_runtime": 41.0972,
+ "eval_samples_per_second": 21.705,
+ "eval_steps_per_second": 2.725,
+ "step": 1600
+ },
+ {
+ "epoch": 0.7601510859301227,
+ "grad_norm": 1.4698175191879272,
+ "learning_rate": 4.725996310180776e-05,
+ "loss": 1.9245,
+ "step": 1610
+ },
+ {
+ "epoch": 0.7648725212464589,
+ "grad_norm": 1.2762446403503418,
+ "learning_rate": 4.7226042792604046e-05,
+ "loss": 1.8556,
+ "step": 1620
+ },
+ {
+ "epoch": 0.7695939565627951,
+ "grad_norm": 1.7315044403076172,
+ "learning_rate": 4.719192614212969e-05,
+ "loss": 1.9757,
+ "step": 1630
+ },
+ {
+ "epoch": 0.7743153918791312,
+ "grad_norm": 1.228724479675293,
+ "learning_rate": 4.7157613451765686e-05,
+ "loss": 2.0371,
+ "step": 1640
+ },
+ {
+ "epoch": 0.7790368271954674,
+ "grad_norm": 1.7622945308685303,
+ "learning_rate": 4.7123105024624776e-05,
+ "loss": 1.8646,
+ "step": 1650
+ },
+ {
+ "epoch": 0.7837582625118036,
+ "grad_norm": 1.4164314270019531,
+ "learning_rate": 4.708840116554883e-05,
+ "loss": 1.8383,
+ "step": 1660
+ },
+ {
+ "epoch": 0.7884796978281398,
+ "grad_norm": 1.8074839115142822,
+ "learning_rate": 4.7053502181106145e-05,
+ "loss": 1.9008,
+ "step": 1670
+ },
+ {
+ "epoch": 0.7932011331444759,
+ "grad_norm": 1.5293446779251099,
+ "learning_rate": 4.70184083795887e-05,
+ "loss": 1.8378,
+ "step": 1680
+ },
+ {
+ "epoch": 0.7979225684608121,
+ "grad_norm": 1.3560067415237427,
+ "learning_rate": 4.698312007100947e-05,
+ "loss": 1.8468,
+ "step": 1690
+ },
+ {
+ "epoch": 0.8026440037771483,
+ "grad_norm": 1.6982842683792114,
+ "learning_rate": 4.694763756709967e-05,
+ "loss": 1.9119,
+ "step": 1700
+ },
+ {
+ "epoch": 0.8026440037771483,
+ "eval_loss": 1.9032281637191772,
+ "eval_runtime": 41.2928,
+ "eval_samples_per_second": 21.602,
+ "eval_steps_per_second": 2.712,
+ "step": 1700
+ },
+ {
+ "epoch": 0.8073654390934845,
+ "grad_norm": 1.5927674770355225,
+ "learning_rate": 4.691196118130601e-05,
+ "loss": 1.9081,
+ "step": 1710
+ },
+ {
+ "epoch": 0.8120868744098206,
+ "grad_norm": 1.4806030988693237,
+ "learning_rate": 4.687609122878791e-05,
+ "loss": 1.8604,
+ "step": 1720
+ },
+ {
+ "epoch": 0.8168083097261567,
+ "grad_norm": 1.5093107223510742,
+ "learning_rate": 4.6840028026414745e-05,
+ "loss": 1.7843,
+ "step": 1730
+ },
+ {
+ "epoch": 0.8215297450424929,
+ "grad_norm": 1.2747288942337036,
+ "learning_rate": 4.6803771892763004e-05,
+ "loss": 1.8666,
+ "step": 1740
+ },
+ {
+ "epoch": 0.826251180358829,
+ "grad_norm": 1.614396333694458,
+ "learning_rate": 4.676732314811353e-05,
+ "loss": 1.8538,
+ "step": 1750
+ },
+ {
+ "epoch": 0.8309726156751652,
+ "grad_norm": 1.3834142684936523,
+ "learning_rate": 4.673068211444862e-05,
+ "loss": 1.8492,
+ "step": 1760
+ },
+ {
+ "epoch": 0.8356940509915014,
+ "grad_norm": 1.6884135007858276,
+ "learning_rate": 4.669384911544927e-05,
+ "loss": 1.8554,
+ "step": 1770
+ },
+ {
+ "epoch": 0.8404154863078376,
+ "grad_norm": 1.3983336687088013,
+ "learning_rate": 4.665682447649222e-05,
+ "loss": 1.9333,
+ "step": 1780
+ },
+ {
+ "epoch": 0.8451369216241738,
+ "grad_norm": 1.45684015750885,
+ "learning_rate": 4.661960852464717e-05,
+ "loss": 1.7886,
+ "step": 1790
+ },
+ {
+ "epoch": 0.8498583569405099,
+ "grad_norm": 1.4929298162460327,
+ "learning_rate": 4.6582201588673816e-05,
+ "loss": 1.8392,
+ "step": 1800
+ },
+ {
+ "epoch": 0.8498583569405099,
+ "eval_loss": 1.9019125699996948,
+ "eval_runtime": 41.2022,
+ "eval_samples_per_second": 21.649,
+ "eval_steps_per_second": 2.718,
+ "step": 1800
+ },
+ {
+ "epoch": 0.8545797922568461,
+ "grad_norm": 1.9542425870895386,
+ "learning_rate": 4.6544603999018966e-05,
+ "loss": 1.8178,
+ "step": 1810
+ },
+ {
+ "epoch": 0.8593012275731823,
+ "grad_norm": 1.7367998361587524,
+ "learning_rate": 4.6506816087813685e-05,
+ "loss": 1.9773,
+ "step": 1820
+ },
+ {
+ "epoch": 0.8640226628895185,
+ "grad_norm": 1.410551905632019,
+ "learning_rate": 4.646883818887025e-05,
+ "loss": 1.7574,
+ "step": 1830
+ },
+ {
+ "epoch": 0.8687440982058546,
+ "grad_norm": 1.2788314819335938,
+ "learning_rate": 4.6430670637679295e-05,
+ "loss": 1.9249,
+ "step": 1840
+ },
+ {
+ "epoch": 0.8734655335221907,
+ "grad_norm": 1.1406760215759277,
+ "learning_rate": 4.63923137714068e-05,
+ "loss": 1.8666,
+ "step": 1850
+ },
+ {
+ "epoch": 0.8781869688385269,
+ "grad_norm": 1.0523242950439453,
+ "learning_rate": 4.635376792889111e-05,
+ "loss": 1.8948,
+ "step": 1860
+ },
+ {
+ "epoch": 0.882908404154863,
+ "grad_norm": 1.2536702156066895,
+ "learning_rate": 4.6315033450639996e-05,
+ "loss": 1.8183,
+ "step": 1870
+ },
+ {
+ "epoch": 0.8876298394711992,
+ "grad_norm": 1.2993184328079224,
+ "learning_rate": 4.6276110678827555e-05,
+ "loss": 1.8918,
+ "step": 1880
+ },
+ {
+ "epoch": 0.8923512747875354,
+ "grad_norm": 1.475024700164795,
+ "learning_rate": 4.6236999957291275e-05,
+ "loss": 1.8303,
+ "step": 1890
+ },
+ {
+ "epoch": 0.8970727101038716,
+ "grad_norm": 1.4702178239822388,
+ "learning_rate": 4.619770163152896e-05,
+ "loss": 1.961,
+ "step": 1900
+ },
+ {
+ "epoch": 0.8970727101038716,
+ "eval_loss": 1.8994309902191162,
+ "eval_runtime": 41.0958,
+ "eval_samples_per_second": 21.705,
+ "eval_steps_per_second": 2.725,
+ "step": 1900
+ },
+ {
+ "epoch": 0.9017941454202077,
+ "grad_norm": 1.4287694692611694,
+ "learning_rate": 4.615821604869564e-05,
+ "loss": 1.8132,
+ "step": 1910
+ },
+ {
+ "epoch": 0.9065155807365439,
+ "grad_norm": 1.98855721950531,
+ "learning_rate": 4.61185435576006e-05,
+ "loss": 1.8418,
+ "step": 1920
+ },
+ {
+ "epoch": 0.9112370160528801,
+ "grad_norm": 1.482932209968567,
+ "learning_rate": 4.607868450870421e-05,
+ "loss": 1.8774,
+ "step": 1930
+ },
+ {
+ "epoch": 0.9159584513692163,
+ "grad_norm": 1.6554712057113647,
+ "learning_rate": 4.6038639254114855e-05,
+ "loss": 1.9419,
+ "step": 1940
+ },
+ {
+ "epoch": 0.9206798866855525,
+ "grad_norm": 1.5708400011062622,
+ "learning_rate": 4.599840814758587e-05,
+ "loss": 2.0166,
+ "step": 1950
+ },
+ {
+ "epoch": 0.9254013220018886,
+ "grad_norm": 1.4725310802459717,
+ "learning_rate": 4.5957991544512316e-05,
+ "loss": 1.9425,
+ "step": 1960
+ },
+ {
+ "epoch": 0.9301227573182247,
+ "grad_norm": 1.569560170173645,
+ "learning_rate": 4.591738980192796e-05,
+ "loss": 1.8323,
+ "step": 1970
+ },
+ {
+ "epoch": 0.9348441926345609,
+ "grad_norm": 1.2551137208938599,
+ "learning_rate": 4.587660327850203e-05,
+ "loss": 1.8055,
+ "step": 1980
+ },
+ {
+ "epoch": 0.939565627950897,
+ "grad_norm": 1.6381155252456665,
+ "learning_rate": 4.583563233453607e-05,
+ "loss": 1.7541,
+ "step": 1990
+ },
+ {
+ "epoch": 0.9442870632672332,
+ "grad_norm": 1.6381361484527588,
+ "learning_rate": 4.579447733196079e-05,
+ "loss": 1.8913,
+ "step": 2000
+ },
+ {
+ "epoch": 0.9442870632672332,
+ "eval_loss": 1.8945337533950806,
+ "eval_runtime": 41.0799,
+ "eval_samples_per_second": 21.714,
+ "eval_steps_per_second": 2.726,
+ "step": 2000
+ },
+ {
+ "epoch": 0.9490084985835694,
+ "grad_norm": 1.4363571405410767,
+ "learning_rate": 4.5753138634332835e-05,
+ "loss": 1.8698,
+ "step": 2010
+ },
+ {
+ "epoch": 0.9537299338999056,
+ "grad_norm": 1.833264708518982,
+ "learning_rate": 4.5711616606831576e-05,
+ "loss": 1.893,
+ "step": 2020
+ },
+ {
+ "epoch": 0.9584513692162417,
+ "grad_norm": 1.4078890085220337,
+ "learning_rate": 4.566991161625589e-05,
+ "loss": 1.965,
+ "step": 2030
+ },
+ {
+ "epoch": 0.9631728045325779,
+ "grad_norm": 1.8140511512756348,
+ "learning_rate": 4.562802403102093e-05,
+ "loss": 1.7615,
+ "step": 2040
+ },
+ {
+ "epoch": 0.9678942398489141,
+ "grad_norm": 1.6720659732818604,
+ "learning_rate": 4.5585954221154856e-05,
+ "loss": 1.8079,
+ "step": 2050
+ },
+ {
+ "epoch": 0.9726156751652503,
+ "grad_norm": 1.3955872058868408,
+ "learning_rate": 4.554370255829558e-05,
+ "loss": 1.8026,
+ "step": 2060
+ },
+ {
+ "epoch": 0.9773371104815864,
+ "grad_norm": 1.525856375694275,
+ "learning_rate": 4.550126941568744e-05,
+ "loss": 1.8584,
+ "step": 2070
+ },
+ {
+ "epoch": 0.9820585457979226,
+ "grad_norm": 1.526258111000061,
+ "learning_rate": 4.5458655168177974e-05,
+ "loss": 1.7369,
+ "step": 2080
+ },
+ {
+ "epoch": 0.9867799811142587,
+ "grad_norm": 1.4565373659133911,
+ "learning_rate": 4.541586019221457e-05,
+ "loss": 1.8483,
+ "step": 2090
+ },
+ {
+ "epoch": 0.9915014164305949,
+ "grad_norm": 1.767482042312622,
+ "learning_rate": 4.5372884865841114e-05,
+ "loss": 1.8187,
+ "step": 2100
+ },
+ {
+ "epoch": 0.9915014164305949,
+ "eval_loss": 1.8941270112991333,
+ "eval_runtime": 41.105,
+ "eval_samples_per_second": 21.701,
+ "eval_steps_per_second": 2.725,
+ "step": 2100
+ },
+ {
+ "epoch": 0.996222851746931,
+ "grad_norm": 1.748547077178955,
+ "learning_rate": 4.532972956869471e-05,
+ "loss": 1.9258,
+ "step": 2110
+ },
+ {
+ "epoch": 1.0009442870632672,
+ "grad_norm": 1.0939620733261108,
+ "learning_rate": 4.528639468200226e-05,
+ "loss": 1.8346,
+ "step": 2120
+ },
+ {
+ "epoch": 1.0056657223796035,
+ "grad_norm": 1.4557725191116333,
+ "learning_rate": 4.524288058857717e-05,
+ "loss": 1.7275,
+ "step": 2130
+ },
+ {
+ "epoch": 1.0103871576959396,
+ "grad_norm": 1.2835229635238647,
+ "learning_rate": 4.51991876728159e-05,
+ "loss": 1.6719,
+ "step": 2140
+ },
+ {
+ "epoch": 1.0151085930122756,
+ "grad_norm": 1.5596965551376343,
+ "learning_rate": 4.515531632069461e-05,
+ "loss": 1.8067,
+ "step": 2150
+ },
+ {
+ "epoch": 1.019830028328612,
+ "grad_norm": 1.5260546207427979,
+ "learning_rate": 4.511126691976574e-05,
+ "loss": 1.7311,
+ "step": 2160
+ },
+ {
+ "epoch": 1.024551463644948,
+ "grad_norm": 2.1776397228240967,
+ "learning_rate": 4.506703985915457e-05,
+ "loss": 1.8171,
+ "step": 2170
+ },
+ {
+ "epoch": 1.0292728989612843,
+ "grad_norm": 1.8992706537246704,
+ "learning_rate": 4.502263552955581e-05,
+ "loss": 1.6716,
+ "step": 2180
+ },
+ {
+ "epoch": 1.0339943342776203,
+ "grad_norm": 1.9256116151809692,
+ "learning_rate": 4.497805432323015e-05,
+ "loss": 1.7456,
+ "step": 2190
+ },
+ {
+ "epoch": 1.0387157695939566,
+ "grad_norm": 1.5675586462020874,
+ "learning_rate": 4.4933296634000734e-05,
+ "loss": 1.7296,
+ "step": 2200
+ },
+ {
+ "epoch": 1.0387157695939566,
+ "eval_loss": 1.9005860090255737,
+ "eval_runtime": 41.0876,
+ "eval_samples_per_second": 21.71,
+ "eval_steps_per_second": 2.726,
+ "step": 2200
+ },
+ {
+ "epoch": 1.0434372049102927,
+ "grad_norm": 1.6989753246307373,
+ "learning_rate": 4.4888362857249775e-05,
+ "loss": 1.7202,
+ "step": 2210
+ },
+ {
+ "epoch": 1.048158640226629,
+ "grad_norm": 1.9172133207321167,
+ "learning_rate": 4.484325338991499e-05,
+ "loss": 1.7879,
+ "step": 2220
+ },
+ {
+ "epoch": 1.052880075542965,
+ "grad_norm": 2.1512560844421387,
+ "learning_rate": 4.4797968630486135e-05,
+ "loss": 1.7202,
+ "step": 2230
+ },
+ {
+ "epoch": 1.0576015108593013,
+ "grad_norm": 2.3440024852752686,
+ "learning_rate": 4.475250897900144e-05,
+ "loss": 1.6839,
+ "step": 2240
+ },
+ {
+ "epoch": 1.0623229461756374,
+ "grad_norm": 1.758452296257019,
+ "learning_rate": 4.470687483704413e-05,
+ "loss": 1.7942,
+ "step": 2250
+ },
+ {
+ "epoch": 1.0670443814919737,
+ "grad_norm": 1.96663498878479,
+ "learning_rate": 4.466106660773885e-05,
+ "loss": 1.7255,
+ "step": 2260
+ },
+ {
+ "epoch": 1.0717658168083097,
+ "grad_norm": 2.048264980316162,
+ "learning_rate": 4.4615084695748074e-05,
+ "loss": 1.6271,
+ "step": 2270
+ },
+ {
+ "epoch": 1.0764872521246458,
+ "grad_norm": 1.7712353467941284,
+ "learning_rate": 4.456892950726861e-05,
+ "loss": 1.7444,
+ "step": 2280
+ },
+ {
+ "epoch": 1.081208687440982,
+ "grad_norm": 2.1486504077911377,
+ "learning_rate": 4.452260145002791e-05,
+ "loss": 1.7943,
+ "step": 2290
+ },
+ {
+ "epoch": 1.0859301227573182,
+ "grad_norm": 1.917653203010559,
+ "learning_rate": 4.447610093328056e-05,
+ "loss": 1.6184,
+ "step": 2300
+ },
+ {
+ "epoch": 1.0859301227573182,
+ "eval_loss": 1.9040113687515259,
+ "eval_runtime": 41.1418,
+ "eval_samples_per_second": 21.681,
+ "eval_steps_per_second": 2.722,
+ "step": 2300
+ },
+ {
+ "epoch": 1.0906515580736544,
+ "grad_norm": 2.010401725769043,
+ "learning_rate": 4.4429428367804605e-05,
+ "loss": 1.7606,
+ "step": 2310
+ },
+ {
+ "epoch": 1.0953729933899905,
+ "grad_norm": 1.863293170928955,
+ "learning_rate": 4.438258416589794e-05,
+ "loss": 1.7419,
+ "step": 2320
+ },
+ {
+ "epoch": 1.1000944287063268,
+ "grad_norm": 2.40513014793396,
+ "learning_rate": 4.43355687413747e-05,
+ "loss": 1.6251,
+ "step": 2330
+ },
+ {
+ "epoch": 1.1048158640226629,
+ "grad_norm": 1.9660564661026,
+ "learning_rate": 4.428838250956153e-05,
+ "loss": 1.7576,
+ "step": 2340
+ },
+ {
+ "epoch": 1.1095372993389991,
+ "grad_norm": 1.7968907356262207,
+ "learning_rate": 4.4241025887293976e-05,
+ "loss": 1.758,
+ "step": 2350
+ },
+ {
+ "epoch": 1.1142587346553352,
+ "grad_norm": 2.0417661666870117,
+ "learning_rate": 4.419349929291279e-05,
+ "loss": 1.759,
+ "step": 2360
+ },
+ {
+ "epoch": 1.1189801699716715,
+ "grad_norm": 1.7925529479980469,
+ "learning_rate": 4.414580314626023e-05,
+ "loss": 1.7568,
+ "step": 2370
+ },
+ {
+ "epoch": 1.1237016052880076,
+ "grad_norm": 2.122156858444214,
+ "learning_rate": 4.4097937868676345e-05,
+ "loss": 1.684,
+ "step": 2380
+ },
+ {
+ "epoch": 1.1284230406043436,
+ "grad_norm": 2.248425245285034,
+ "learning_rate": 4.404990388299527e-05,
+ "loss": 1.6816,
+ "step": 2390
+ },
+ {
+ "epoch": 1.13314447592068,
+ "grad_norm": 2.147604465484619,
+ "learning_rate": 4.4001701613541456e-05,
+ "loss": 1.6973,
+ "step": 2400
+ },
+ {
+ "epoch": 1.13314447592068,
+ "eval_loss": 1.9056047201156616,
+ "eval_runtime": 41.0721,
+ "eval_samples_per_second": 21.718,
+ "eval_steps_per_second": 2.727,
+ "step": 2400
+ },
+ {
+ "epoch": 1.13314447592068,
+ "step": 2400,
+ "total_flos": 9.512959383227597e+17,
+ "train_loss": 1.9100826263427735,
+ "train_runtime": 3885.2685,
+ "train_samples_per_second": 21.805,
+ "train_steps_per_second": 2.726
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 10590,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 100,
+ "total_flos": 9.512959383227597e+17,
+ "train_batch_size": 4,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/llama2_13b_peft/topical_chat/training_args.bin b/llama2_13b_peft/topical_chat/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..95709355e0c3be0b2bd443d488fc6d61cc771cfe
--- /dev/null
+++ b/llama2_13b_peft/topical_chat/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f7494ca477a770eb8a9f51a202a612e05a71973237d5bb7ae54dd4d1ec4b49d
+size 5176
diff --git a/llama2_13b_peft/topical_chat/training_eval_loss.png b/llama2_13b_peft/topical_chat/training_eval_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa9bbfc89949f983e02433500c38643c1a322e75
Binary files /dev/null and b/llama2_13b_peft/topical_chat/training_eval_loss.png differ
diff --git a/llama2_13b_peft/topical_chat/training_loss.png b/llama2_13b_peft/topical_chat/training_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..50b461e743f86e7e4a795241623d7ccaec963516
Binary files /dev/null and b/llama2_13b_peft/topical_chat/training_loss.png differ
diff --git a/llama2_13b_peft/unit_conversion/README.md b/llama2_13b_peft/unit_conversion/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..46b96e39eb696192311be74af2fb95e2f55130a8
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/README.md
@@ -0,0 +1,77 @@
+---
+license: other
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: /data1/model/llama2/meta-llama/Llama2-13b
+model-index:
+- name: unit_conversion_no_sys
+ results: []
+---
+
+
+
+# unit_conversion_no_sys
+
+This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the unit_conversion_no_sys dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3370
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- total_train_batch_size: 32
+- total_eval_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 20
+- num_epochs: 5.0
+
+### Training results
+
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.5422 | 0.39 | 200 | 0.4792 |
+| 0.319 | 0.79 | 400 | 0.4168 |
+| 0.3583 | 1.18 | 600 | 0.3873 |
+| 0.3048 | 1.57 | 800 | 0.3692 |
+| 0.4185 | 1.96 | 1000 | 0.3550 |
+| 0.3737 | 2.36 | 1200 | 0.3487 |
+| 0.2418 | 2.75 | 1400 | 0.3422 |
+| 0.2528 | 3.14 | 1600 | 0.3390 |
+| 0.3192 | 3.54 | 1800 | 0.3393 |
+| 0.2834 | 3.93 | 2000 | 0.3370 |
+| 0.3612 | 4.32 | 2200 | 0.3375 |
+| 0.2732 | 4.72 | 2400 | 0.3369 |
+
+
+### Framework versions
+
+- PEFT 0.9.0
+- Transformers 4.38.2
+- Pytorch 2.2.1
+- Datasets 2.18.0
+- Tokenizers 0.15.2
\ No newline at end of file
diff --git a/llama2_13b_peft/unit_conversion/adapter_config.json b/llama2_13b_peft/unit_conversion/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..01231b454b0227fa8e9b0ad973e7c2a10d64504a
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/adapter_config.json
@@ -0,0 +1,33 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0.0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "k_proj",
+ "q_proj",
+ "o_proj",
+ "gate_proj",
+ "v_proj",
+ "down_proj",
+ "up_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/unit_conversion/adapter_model.safetensors b/llama2_13b_peft/unit_conversion/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2e8902fdfb6542b7f86a5ced874aeae21b553474
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e20cbcb46e97e5f6e3e97eae69742c437853adbc601fc782fb3715d6daabd97
+size 125248064
diff --git a/llama2_13b_peft/unit_conversion/all_results.json b/llama2_13b_peft/unit_conversion/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..10a1fa634782a325b03e268c73b4254dd7d0462f
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/all_results.json
@@ -0,0 +1,11 @@
+{
+ "epoch": 5.0,
+ "eval_loss": 0.3370112180709839,
+ "eval_runtime": 39.0124,
+ "eval_samples_per_second": 73.643,
+ "eval_steps_per_second": 2.307,
+ "train_loss": 0.3501229747105207,
+ "train_runtime": 3965.184,
+ "train_samples_per_second": 20.526,
+ "train_steps_per_second": 0.642
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/unit_conversion/eval_results.json b/llama2_13b_peft/unit_conversion/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3026f9de0874f22c6d76caab881b00904d300706
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/eval_results.json
@@ -0,0 +1,7 @@
+{
+ "epoch": 5.0,
+ "eval_loss": 0.3370112180709839,
+ "eval_runtime": 39.0124,
+ "eval_samples_per_second": 73.643,
+ "eval_steps_per_second": 2.307
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/unit_conversion/special_tokens_map.json b/llama2_13b_peft/unit_conversion/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/llama2_13b_peft/unit_conversion/tokenizer.model b/llama2_13b_peft/unit_conversion/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/llama2_13b_peft/unit_conversion/tokenizer_config.json b/llama2_13b_peft/unit_conversion/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/tokenizer_config.json
@@ -0,0 +1,45 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}
diff --git a/llama2_13b_peft/unit_conversion/train_results.json b/llama2_13b_peft/unit_conversion/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..74b74ee7b9290e812e3805228f50b477e12286d6
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/train_results.json
@@ -0,0 +1,7 @@
+{
+ "epoch": 5.0,
+ "train_loss": 0.3501229747105207,
+ "train_runtime": 3965.184,
+ "train_samples_per_second": 20.526,
+ "train_steps_per_second": 0.642
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/unit_conversion/trainer_log.jsonl b/llama2_13b_peft/unit_conversion/trainer_log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..d2fe40a29321770c193fe31d8f98184fc16f85df
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/trainer_log.jsonl
@@ -0,0 +1,268 @@
+{"current_steps": 10, "total_steps": 2545, "loss": 1.6608, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5e-06, "epoch": 0.02, "percentage": 0.39, "elapsed_time": "0:00:17", "remaining_time": "1:12:00"}
+{"current_steps": 20, "total_steps": 2545, "loss": 1.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1e-05, "epoch": 0.04, "percentage": 0.79, "elapsed_time": "0:00:30", "remaining_time": "1:04:46"}
+{"current_steps": 30, "total_steps": 2545, "loss": 1.6975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99961299962427e-06, "epoch": 0.06, "percentage": 1.18, "elapsed_time": "0:00:46", "remaining_time": "1:04:46"}
+{"current_steps": 40, "total_steps": 2545, "loss": 1.3794, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.998452058404793e-06, "epoch": 0.08, "percentage": 1.57, "elapsed_time": "0:00:58", "remaining_time": "1:00:42"}
+{"current_steps": 50, "total_steps": 2545, "loss": 1.0864, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.996517356055444e-06, "epoch": 0.1, "percentage": 1.96, "elapsed_time": "0:01:11", "remaining_time": "0:59:07"}
+{"current_steps": 60, "total_steps": 2545, "loss": 0.6983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.99380919206844e-06, "epoch": 0.12, "percentage": 2.36, "elapsed_time": "0:01:25", "remaining_time": "0:59:04"}
+{"current_steps": 70, "total_steps": 2545, "loss": 0.564, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.990327985667972e-06, "epoch": 0.14, "percentage": 2.75, "elapsed_time": "0:01:38", "remaining_time": "0:57:57"}
+{"current_steps": 80, "total_steps": 2545, "loss": 0.5864, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.986074275745314e-06, "epoch": 0.16, "percentage": 3.14, "elapsed_time": "0:01:50", "remaining_time": "0:56:48"}
+{"current_steps": 90, "total_steps": 2545, "loss": 0.4707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.981048720775401e-06, "epoch": 0.18, "percentage": 3.54, "elapsed_time": "0:02:03", "remaining_time": "0:56:14"}
+{"current_steps": 100, "total_steps": 2545, "loss": 0.5636, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.975252098714898e-06, "epoch": 0.2, "percentage": 3.93, "elapsed_time": "0:02:16", "remaining_time": "0:55:34"}
+{"current_steps": 110, "total_steps": 2545, "loss": 0.4544, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.968685306881772e-06, "epoch": 0.22, "percentage": 4.32, "elapsed_time": "0:02:30", "remaining_time": "0:55:21"}
+{"current_steps": 120, "total_steps": 2545, "loss": 0.472, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.961349361816384e-06, "epoch": 0.24, "percentage": 4.72, "elapsed_time": "0:02:42", "remaining_time": "0:54:47"}
+{"current_steps": 130, "total_steps": 2545, "loss": 0.4864, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.953245399124133e-06, "epoch": 0.26, "percentage": 5.11, "elapsed_time": "0:03:01", "remaining_time": "0:56:07"}
+{"current_steps": 140, "total_steps": 2545, "loss": 0.4197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.94437467329966e-06, "epoch": 0.28, "percentage": 5.5, "elapsed_time": "0:03:16", "remaining_time": "0:56:16"}
+{"current_steps": 150, "total_steps": 2545, "loss": 0.4712, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.934738557532663e-06, "epoch": 0.29, "percentage": 5.89, "elapsed_time": "0:03:30", "remaining_time": "0:55:54"}
+{"current_steps": 160, "total_steps": 2545, "loss": 0.3896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.924338543495302e-06, "epoch": 0.31, "percentage": 6.29, "elapsed_time": "0:03:45", "remaining_time": "0:55:59"}
+{"current_steps": 170, "total_steps": 2545, "loss": 0.5073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.91317624111132e-06, "epoch": 0.33, "percentage": 6.68, "elapsed_time": "0:03:57", "remaining_time": "0:55:19"}
+{"current_steps": 180, "total_steps": 2545, "loss": 0.4667, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.9012533783068e-06, "epoch": 0.35, "percentage": 7.07, "elapsed_time": "0:04:09", "remaining_time": "0:54:40"}
+{"current_steps": 190, "total_steps": 2545, "loss": 0.4281, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.888571800742694e-06, "epoch": 0.37, "percentage": 7.47, "elapsed_time": "0:04:24", "remaining_time": "0:54:42"}
+{"current_steps": 200, "total_steps": 2545, "loss": 0.5422, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.87513347152912e-06, "epoch": 0.39, "percentage": 7.86, "elapsed_time": "0:04:41", "remaining_time": "0:54:57"}
+{"current_steps": 200, "total_steps": 2545, "loss": null, "eval_loss": 0.47923311591148376, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.39, "percentage": 7.86, "elapsed_time": "0:04:41", "remaining_time": "0:54:57"}
+{"current_steps": 210, "total_steps": 2545, "loss": 0.4597, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.860940470921457e-06, "epoch": 0.41, "percentage": 8.25, "elapsed_time": "0:05:33", "remaining_time": "1:01:49"}
+{"current_steps": 220, "total_steps": 2545, "loss": 0.4876, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.845994995998332e-06, "epoch": 0.43, "percentage": 8.64, "elapsed_time": "0:05:44", "remaining_time": "1:00:39"}
+{"current_steps": 230, "total_steps": 2545, "loss": 0.5003, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.830299360321509e-06, "epoch": 0.45, "percentage": 9.04, "elapsed_time": "0:05:55", "remaining_time": "0:59:42"}
+{"current_steps": 240, "total_steps": 2545, "loss": 0.3836, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.813855993577753e-06, "epoch": 0.47, "percentage": 9.43, "elapsed_time": "0:06:11", "remaining_time": "0:59:24"}
+{"current_steps": 250, "total_steps": 2545, "loss": 0.4631, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.796667441202705e-06, "epoch": 0.49, "percentage": 9.82, "elapsed_time": "0:06:24", "remaining_time": "0:58:50"}
+{"current_steps": 260, "total_steps": 2545, "loss": 0.4101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.778736363986854e-06, "epoch": 0.51, "percentage": 10.22, "elapsed_time": "0:06:36", "remaining_time": "0:58:01"}
+{"current_steps": 270, "total_steps": 2545, "loss": 0.4622, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.76006553766365e-06, "epoch": 0.53, "percentage": 10.61, "elapsed_time": "0:06:50", "remaining_time": "0:57:35"}
+{"current_steps": 280, "total_steps": 2545, "loss": 0.4378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.740657852479815e-06, "epoch": 0.55, "percentage": 11.0, "elapsed_time": "0:07:05", "remaining_time": "0:57:20"}
+{"current_steps": 290, "total_steps": 2545, "loss": 0.3582, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.72051631274793e-06, "epoch": 0.57, "percentage": 11.39, "elapsed_time": "0:07:19", "remaining_time": "0:56:56"}
+{"current_steps": 300, "total_steps": 2545, "loss": 0.45, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.699644036381374e-06, "epoch": 0.59, "percentage": 11.79, "elapsed_time": "0:07:31", "remaining_time": "0:56:21"}
+{"current_steps": 310, "total_steps": 2545, "loss": 0.3289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.678044254411665e-06, "epoch": 0.61, "percentage": 12.18, "elapsed_time": "0:07:46", "remaining_time": "0:55:59"}
+{"current_steps": 320, "total_steps": 2545, "loss": 0.3618, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.655720310488298e-06, "epoch": 0.63, "percentage": 12.57, "elapsed_time": "0:07:57", "remaining_time": "0:55:21"}
+{"current_steps": 330, "total_steps": 2545, "loss": 0.3579, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.632675660361148e-06, "epoch": 0.65, "percentage": 12.97, "elapsed_time": "0:08:11", "remaining_time": "0:55:02"}
+{"current_steps": 340, "total_steps": 2545, "loss": 0.4039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.60891387134552e-06, "epoch": 0.67, "percentage": 13.36, "elapsed_time": "0:08:26", "remaining_time": "0:54:46"}
+{"current_steps": 350, "total_steps": 2545, "loss": 0.4668, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.58443862176992e-06, "epoch": 0.69, "percentage": 13.75, "elapsed_time": "0:08:40", "remaining_time": "0:54:26"}
+{"current_steps": 360, "total_steps": 2545, "loss": 0.4174, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.559253700406663e-06, "epoch": 0.71, "percentage": 14.15, "elapsed_time": "0:08:53", "remaining_time": "0:54:00"}
+{"current_steps": 370, "total_steps": 2545, "loss": 0.4081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.533363005885362e-06, "epoch": 0.73, "percentage": 14.54, "elapsed_time": "0:09:07", "remaining_time": "0:53:37"}
+{"current_steps": 380, "total_steps": 2545, "loss": 0.3972, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.506770546089422e-06, "epoch": 0.75, "percentage": 14.93, "elapsed_time": "0:09:19", "remaining_time": "0:53:10"}
+{"current_steps": 390, "total_steps": 2545, "loss": 0.3379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.479480437535613e-06, "epoch": 0.77, "percentage": 15.32, "elapsed_time": "0:09:36", "remaining_time": "0:53:03"}
+{"current_steps": 400, "total_steps": 2545, "loss": 0.319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.45149690473684e-06, "epoch": 0.79, "percentage": 15.72, "elapsed_time": "0:09:47", "remaining_time": "0:52:29"}
+{"current_steps": 400, "total_steps": 2545, "loss": null, "eval_loss": 0.41680005192756653, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.79, "percentage": 15.72, "elapsed_time": "0:09:47", "remaining_time": "0:52:29"}
+{"current_steps": 410, "total_steps": 2545, "loss": 0.454, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.422824279548189e-06, "epoch": 0.81, "percentage": 16.11, "elapsed_time": "0:10:38", "remaining_time": "0:55:27"}
+{"current_steps": 420, "total_steps": 2545, "loss": 0.3599, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.393467000496345e-06, "epoch": 0.83, "percentage": 16.5, "elapsed_time": "0:10:53", "remaining_time": "0:55:07"}
+{"current_steps": 430, "total_steps": 2545, "loss": 0.3763, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.36342961209252e-06, "epoch": 0.84, "percentage": 16.9, "elapsed_time": "0:11:07", "remaining_time": "0:54:43"}
+{"current_steps": 440, "total_steps": 2545, "loss": 0.3931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.332716764128952e-06, "epoch": 0.86, "percentage": 17.29, "elapsed_time": "0:11:22", "remaining_time": "0:54:22"}
+{"current_steps": 450, "total_steps": 2545, "loss": 0.3708, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.301333210959123e-06, "epoch": 0.88, "percentage": 17.68, "elapsed_time": "0:11:35", "remaining_time": "0:53:56"}
+{"current_steps": 460, "total_steps": 2545, "loss": 0.3863, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.26928381076178e-06, "epoch": 0.9, "percentage": 18.07, "elapsed_time": "0:11:48", "remaining_time": "0:53:32"}
+{"current_steps": 470, "total_steps": 2545, "loss": 0.3334, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.236573524788888e-06, "epoch": 0.92, "percentage": 18.47, "elapsed_time": "0:12:01", "remaining_time": "0:53:06"}
+{"current_steps": 480, "total_steps": 2545, "loss": 0.4054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.203207416597635e-06, "epoch": 0.94, "percentage": 18.86, "elapsed_time": "0:12:15", "remaining_time": "0:52:46"}
+{"current_steps": 490, "total_steps": 2545, "loss": 0.3992, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.169190651266582e-06, "epoch": 0.96, "percentage": 19.25, "elapsed_time": "0:12:29", "remaining_time": "0:52:22"}
+{"current_steps": 500, "total_steps": 2545, "loss": 0.3113, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.134528494596116e-06, "epoch": 0.98, "percentage": 19.65, "elapsed_time": "0:12:42", "remaining_time": "0:51:57"}
+{"current_steps": 510, "total_steps": 2545, "loss": 0.4078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.099226312293296e-06, "epoch": 1.0, "percentage": 20.04, "elapsed_time": "0:12:55", "remaining_time": "0:51:33"}
+{"current_steps": 520, "total_steps": 2545, "loss": 0.3958, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.063289569141251e-06, "epoch": 1.02, "percentage": 20.43, "elapsed_time": "0:13:09", "remaining_time": "0:51:15"}
+{"current_steps": 530, "total_steps": 2545, "loss": 0.3513, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.026723828153224e-06, "epoch": 1.04, "percentage": 20.83, "elapsed_time": "0:13:25", "remaining_time": "0:51:00"}
+{"current_steps": 540, "total_steps": 2545, "loss": 0.3857, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.98953474971141e-06, "epoch": 1.06, "percentage": 21.22, "elapsed_time": "0:13:38", "remaining_time": "0:50:37"}
+{"current_steps": 550, "total_steps": 2545, "loss": 0.3646, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.951728090690743e-06, "epoch": 1.08, "percentage": 21.61, "elapsed_time": "0:13:50", "remaining_time": "0:50:13"}
+{"current_steps": 560, "total_steps": 2545, "loss": 0.3052, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.913309703567722e-06, "epoch": 1.1, "percentage": 22.0, "elapsed_time": "0:14:06", "remaining_time": "0:50:00"}
+{"current_steps": 570, "total_steps": 2545, "loss": 0.3649, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.87428553551445e-06, "epoch": 1.12, "percentage": 22.4, "elapsed_time": "0:14:19", "remaining_time": "0:49:38"}
+{"current_steps": 580, "total_steps": 2545, "loss": 0.3371, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.834661627478003e-06, "epoch": 1.14, "percentage": 22.79, "elapsed_time": "0:14:32", "remaining_time": "0:49:17"}
+{"current_steps": 590, "total_steps": 2545, "loss": 0.346, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.794444113245302e-06, "epoch": 1.16, "percentage": 23.18, "elapsed_time": "0:14:45", "remaining_time": "0:48:52"}
+{"current_steps": 600, "total_steps": 2545, "loss": 0.3583, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.753639218493594e-06, "epoch": 1.18, "percentage": 23.58, "elapsed_time": "0:14:57", "remaining_time": "0:48:30"}
+{"current_steps": 600, "total_steps": 2545, "loss": null, "eval_loss": 0.38733917474746704, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.18, "percentage": 23.58, "elapsed_time": "0:14:57", "remaining_time": "0:48:30"}
+{"current_steps": 610, "total_steps": 2545, "loss": 0.3845, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.712253259826719e-06, "epoch": 1.2, "percentage": 23.97, "elapsed_time": "0:15:52", "remaining_time": "0:50:19"}
+{"current_steps": 620, "total_steps": 2545, "loss": 0.431, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.670292643797302e-06, "epoch": 1.22, "percentage": 24.36, "elapsed_time": "0:16:06", "remaining_time": "0:49:59"}
+{"current_steps": 630, "total_steps": 2545, "loss": 0.3512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.627763865915005e-06, "epoch": 1.24, "percentage": 24.75, "elapsed_time": "0:16:20", "remaining_time": "0:49:38"}
+{"current_steps": 640, "total_steps": 2545, "loss": 0.3475, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.58467350964104e-06, "epoch": 1.26, "percentage": 25.15, "elapsed_time": "0:16:33", "remaining_time": "0:49:17"}
+{"current_steps": 650, "total_steps": 2545, "loss": 0.3372, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.541028245369033e-06, "epoch": 1.28, "percentage": 25.54, "elapsed_time": "0:16:48", "remaining_time": "0:49:00"}
+{"current_steps": 660, "total_steps": 2545, "loss": 0.3133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.496834829392454e-06, "epoch": 1.3, "percentage": 25.93, "elapsed_time": "0:16:59", "remaining_time": "0:48:33"}
+{"current_steps": 670, "total_steps": 2545, "loss": 0.3274, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.452100102858734e-06, "epoch": 1.32, "percentage": 26.33, "elapsed_time": "0:17:16", "remaining_time": "0:48:19"}
+{"current_steps": 680, "total_steps": 2545, "loss": 0.3771, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.40683099071027e-06, "epoch": 1.34, "percentage": 26.72, "elapsed_time": "0:17:29", "remaining_time": "0:47:57"}
+{"current_steps": 690, "total_steps": 2545, "loss": 0.3636, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.361034500612421e-06, "epoch": 1.36, "percentage": 27.11, "elapsed_time": "0:17:45", "remaining_time": "0:47:44"}
+{"current_steps": 700, "total_steps": 2545, "loss": 0.3026, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.31471772186874e-06, "epoch": 1.38, "percentage": 27.5, "elapsed_time": "0:17:59", "remaining_time": "0:47:24"}
+{"current_steps": 710, "total_steps": 2545, "loss": 0.3075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.267887824323537e-06, "epoch": 1.39, "percentage": 27.9, "elapsed_time": "0:18:13", "remaining_time": "0:47:05"}
+{"current_steps": 720, "total_steps": 2545, "loss": 0.2668, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.22055205725199e-06, "epoch": 1.41, "percentage": 28.29, "elapsed_time": "0:18:25", "remaining_time": "0:46:42"}
+{"current_steps": 730, "total_steps": 2545, "loss": 0.2809, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.172717748237955e-06, "epoch": 1.43, "percentage": 28.68, "elapsed_time": "0:18:40", "remaining_time": "0:46:25"}
+{"current_steps": 740, "total_steps": 2545, "loss": 0.2968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.124392302039658e-06, "epoch": 1.45, "percentage": 29.08, "elapsed_time": "0:18:54", "remaining_time": "0:46:07"}
+{"current_steps": 750, "total_steps": 2545, "loss": 0.3148, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.075583199443431e-06, "epoch": 1.47, "percentage": 29.47, "elapsed_time": "0:19:06", "remaining_time": "0:45:44"}
+{"current_steps": 760, "total_steps": 2545, "loss": 0.3266, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.026297996105694e-06, "epoch": 1.49, "percentage": 29.86, "elapsed_time": "0:19:20", "remaining_time": "0:45:26"}
+{"current_steps": 770, "total_steps": 2545, "loss": 0.3143, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.97654432138333e-06, "epoch": 1.51, "percentage": 30.26, "elapsed_time": "0:19:34", "remaining_time": "0:45:06"}
+{"current_steps": 780, "total_steps": 2545, "loss": 0.3853, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.926329877152665e-06, "epoch": 1.53, "percentage": 30.65, "elapsed_time": "0:19:46", "remaining_time": "0:44:43"}
+{"current_steps": 790, "total_steps": 2545, "loss": 0.3196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.875662436617211e-06, "epoch": 1.55, "percentage": 31.04, "elapsed_time": "0:20:01", "remaining_time": "0:44:28"}
+{"current_steps": 800, "total_steps": 2545, "loss": 0.3048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.824549843104378e-06, "epoch": 1.57, "percentage": 31.43, "elapsed_time": "0:20:12", "remaining_time": "0:44:05"}
+{"current_steps": 800, "total_steps": 2545, "loss": null, "eval_loss": 0.369180291891098, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.57, "percentage": 31.43, "elapsed_time": "0:20:12", "remaining_time": "0:44:05"}
+{"current_steps": 810, "total_steps": 2545, "loss": 0.3529, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.773000008851323e-06, "epoch": 1.59, "percentage": 31.83, "elapsed_time": "0:21:08", "remaining_time": "0:45:16"}
+{"current_steps": 820, "total_steps": 2545, "loss": 0.4047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.721020913780137e-06, "epoch": 1.61, "percentage": 32.22, "elapsed_time": "0:21:20", "remaining_time": "0:44:52"}
+{"current_steps": 830, "total_steps": 2545, "loss": 0.3268, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.668620604262548e-06, "epoch": 1.63, "percentage": 32.61, "elapsed_time": "0:21:32", "remaining_time": "0:44:31"}
+{"current_steps": 840, "total_steps": 2545, "loss": 0.3198, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.615807191874349e-06, "epoch": 1.65, "percentage": 33.01, "elapsed_time": "0:21:45", "remaining_time": "0:44:09"}
+{"current_steps": 850, "total_steps": 2545, "loss": 0.3856, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.56258885213971e-06, "epoch": 1.67, "percentage": 33.4, "elapsed_time": "0:21:58", "remaining_time": "0:43:48"}
+{"current_steps": 860, "total_steps": 2545, "loss": 0.2915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.50897382326562e-06, "epoch": 1.69, "percentage": 33.79, "elapsed_time": "0:22:11", "remaining_time": "0:43:28"}
+{"current_steps": 870, "total_steps": 2545, "loss": 0.2955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.454970404866612e-06, "epoch": 1.71, "percentage": 34.18, "elapsed_time": "0:22:27", "remaining_time": "0:43:14"}
+{"current_steps": 880, "total_steps": 2545, "loss": 0.3256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.400586956679965e-06, "epoch": 1.73, "percentage": 34.58, "elapsed_time": "0:22:41", "remaining_time": "0:42:56"}
+{"current_steps": 890, "total_steps": 2545, "loss": 0.3329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.345831897271636e-06, "epoch": 1.75, "percentage": 34.97, "elapsed_time": "0:22:55", "remaining_time": "0:42:37"}
+{"current_steps": 900, "total_steps": 2545, "loss": 0.4319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.2907137027330455e-06, "epoch": 1.77, "percentage": 35.36, "elapsed_time": "0:23:07", "remaining_time": "0:42:16"}
+{"current_steps": 910, "total_steps": 2545, "loss": 0.3695, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.235240905368997e-06, "epoch": 1.79, "percentage": 35.76, "elapsed_time": "0:23:23", "remaining_time": "0:42:00"}
+{"current_steps": 920, "total_steps": 2545, "loss": 0.2896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.179422092376856e-06, "epoch": 1.81, "percentage": 36.15, "elapsed_time": "0:23:33", "remaining_time": "0:41:37"}
+{"current_steps": 930, "total_steps": 2545, "loss": 0.3428, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.123265904517264e-06, "epoch": 1.83, "percentage": 36.54, "elapsed_time": "0:23:45", "remaining_time": "0:41:15"}
+{"current_steps": 940, "total_steps": 2545, "loss": 0.3375, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.066781034776546e-06, "epoch": 1.85, "percentage": 36.94, "elapsed_time": "0:23:58", "remaining_time": "0:40:57"}
+{"current_steps": 950, "total_steps": 2545, "loss": 0.3072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.009976227021018e-06, "epoch": 1.87, "percentage": 37.33, "elapsed_time": "0:24:12", "remaining_time": "0:40:38"}
+{"current_steps": 960, "total_steps": 2545, "loss": 0.3428, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.952860274643466e-06, "epoch": 1.89, "percentage": 37.72, "elapsed_time": "0:24:26", "remaining_time": "0:40:20"}
+{"current_steps": 970, "total_steps": 2545, "loss": 0.374, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.895442019201898e-06, "epoch": 1.91, "percentage": 38.11, "elapsed_time": "0:24:41", "remaining_time": "0:40:05"}
+{"current_steps": 980, "total_steps": 2545, "loss": 0.3584, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.837730349050886e-06, "epoch": 1.93, "percentage": 38.51, "elapsed_time": "0:24:54", "remaining_time": "0:39:46"}
+{"current_steps": 990, "total_steps": 2545, "loss": 0.2854, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.7797341979656454e-06, "epoch": 1.94, "percentage": 38.9, "elapsed_time": "0:25:08", "remaining_time": "0:39:28"}
+{"current_steps": 1000, "total_steps": 2545, "loss": 0.4185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.721462543759077e-06, "epoch": 1.96, "percentage": 39.29, "elapsed_time": "0:25:23", "remaining_time": "0:39:13"}
+{"current_steps": 1000, "total_steps": 2545, "loss": null, "eval_loss": 0.35499048233032227, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.96, "percentage": 39.29, "elapsed_time": "0:25:23", "remaining_time": "0:39:13"}
+{"current_steps": 1010, "total_steps": 2545, "loss": 0.3806, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.6629244068920155e-06, "epoch": 1.98, "percentage": 39.69, "elapsed_time": "0:26:15", "remaining_time": "0:39:53"}
+{"current_steps": 1020, "total_steps": 2545, "loss": 0.2947, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.6041288490768385e-06, "epoch": 2.0, "percentage": 40.08, "elapsed_time": "0:26:31", "remaining_time": "0:39:39"}
+{"current_steps": 1030, "total_steps": 2545, "loss": 0.2732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.545084971874738e-06, "epoch": 2.02, "percentage": 40.47, "elapsed_time": "0:26:43", "remaining_time": "0:39:18"}
+{"current_steps": 1040, "total_steps": 2545, "loss": 0.2952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.485801915286776e-06, "epoch": 2.04, "percentage": 40.86, "elapsed_time": "0:26:59", "remaining_time": "0:39:03"}
+{"current_steps": 1050, "total_steps": 2545, "loss": 0.2251, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.426288856339022e-06, "epoch": 2.06, "percentage": 41.26, "elapsed_time": "0:27:13", "remaining_time": "0:38:46"}
+{"current_steps": 1060, "total_steps": 2545, "loss": 0.2822, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.366555007661949e-06, "epoch": 2.08, "percentage": 41.65, "elapsed_time": "0:27:28", "remaining_time": "0:38:29"}
+{"current_steps": 1070, "total_steps": 2545, "loss": 0.278, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.306609616064304e-06, "epoch": 2.1, "percentage": 42.04, "elapsed_time": "0:27:41", "remaining_time": "0:38:10"}
+{"current_steps": 1080, "total_steps": 2545, "loss": 0.2213, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.246461961101721e-06, "epoch": 2.12, "percentage": 42.44, "elapsed_time": "0:27:54", "remaining_time": "0:37:50"}
+{"current_steps": 1090, "total_steps": 2545, "loss": 0.3001, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.186121353640223e-06, "epoch": 2.14, "percentage": 42.83, "elapsed_time": "0:28:09", "remaining_time": "0:37:35"}
+{"current_steps": 1100, "total_steps": 2545, "loss": 0.3357, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.125597134414919e-06, "epoch": 2.16, "percentage": 43.22, "elapsed_time": "0:28:23", "remaining_time": "0:37:17"}
+{"current_steps": 1110, "total_steps": 2545, "loss": 0.3637, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.064898672584036e-06, "epoch": 2.18, "percentage": 43.61, "elapsed_time": "0:28:36", "remaining_time": "0:36:58"}
+{"current_steps": 1120, "total_steps": 2545, "loss": 0.4342, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.004035364278593e-06, "epoch": 2.2, "percentage": 44.01, "elapsed_time": "0:28:49", "remaining_time": "0:36:40"}
+{"current_steps": 1130, "total_steps": 2545, "loss": 0.3534, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.94301663114786e-06, "epoch": 2.22, "percentage": 44.4, "elapsed_time": "0:29:02", "remaining_time": "0:36:21"}
+{"current_steps": 1140, "total_steps": 2545, "loss": 0.271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.881851918900897e-06, "epoch": 2.24, "percentage": 44.79, "elapsed_time": "0:29:14", "remaining_time": "0:36:02"}
+{"current_steps": 1150, "total_steps": 2545, "loss": 0.3774, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.820550695844354e-06, "epoch": 2.26, "percentage": 45.19, "elapsed_time": "0:29:26", "remaining_time": "0:35:43"}
+{"current_steps": 1160, "total_steps": 2545, "loss": 0.2967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.759122451416769e-06, "epoch": 2.28, "percentage": 45.58, "elapsed_time": "0:29:39", "remaining_time": "0:35:24"}
+{"current_steps": 1170, "total_steps": 2545, "loss": 0.2556, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.697576694719616e-06, "epoch": 2.3, "percentage": 45.97, "elapsed_time": "0:29:57", "remaining_time": "0:35:12"}
+{"current_steps": 1180, "total_steps": 2545, "loss": 0.3435, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.635922953045278e-06, "epoch": 2.32, "percentage": 46.37, "elapsed_time": "0:30:10", "remaining_time": "0:34:54"}
+{"current_steps": 1190, "total_steps": 2545, "loss": 0.3064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.574170770402236e-06, "epoch": 2.34, "percentage": 46.76, "elapsed_time": "0:30:23", "remaining_time": "0:34:36"}
+{"current_steps": 1200, "total_steps": 2545, "loss": 0.3737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.512329706037643e-06, "epoch": 2.36, "percentage": 47.15, "elapsed_time": "0:30:35", "remaining_time": "0:34:16"}
+{"current_steps": 1200, "total_steps": 2545, "loss": null, "eval_loss": 0.34871676564216614, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.36, "percentage": 47.15, "elapsed_time": "0:30:35", "remaining_time": "0:34:16"}
+{"current_steps": 1210, "total_steps": 2545, "loss": 0.3051, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.4504093329575546e-06, "epoch": 2.38, "percentage": 47.54, "elapsed_time": "0:31:27", "remaining_time": "0:34:42"}
+{"current_steps": 1220, "total_steps": 2545, "loss": 0.3564, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.388419236445033e-06, "epoch": 2.4, "percentage": 47.94, "elapsed_time": "0:31:40", "remaining_time": "0:34:23"}
+{"current_steps": 1230, "total_steps": 2545, "loss": 0.3243, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.3263690125763316e-06, "epoch": 2.42, "percentage": 48.33, "elapsed_time": "0:31:55", "remaining_time": "0:34:07"}
+{"current_steps": 1240, "total_steps": 2545, "loss": 0.2923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.264268266735432e-06, "epoch": 2.44, "percentage": 48.72, "elapsed_time": "0:32:08", "remaining_time": "0:33:49"}
+{"current_steps": 1250, "total_steps": 2545, "loss": 0.3074, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.202126612127125e-06, "epoch": 2.46, "percentage": 49.12, "elapsed_time": "0:32:22", "remaining_time": "0:33:32"}
+{"current_steps": 1260, "total_steps": 2545, "loss": 0.3111, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.139953668288883e-06, "epoch": 2.48, "percentage": 49.51, "elapsed_time": "0:32:35", "remaining_time": "0:33:14"}
+{"current_steps": 1270, "total_steps": 2545, "loss": 0.3539, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.077759059601756e-06, "epoch": 2.5, "percentage": 49.9, "elapsed_time": "0:32:48", "remaining_time": "0:32:56"}
+{"current_steps": 1280, "total_steps": 2545, "loss": 0.2223, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.015552413800514e-06, "epoch": 2.51, "percentage": 50.29, "elapsed_time": "0:33:03", "remaining_time": "0:32:40"}
+{"current_steps": 1290, "total_steps": 2545, "loss": 0.2714, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.953343360483281e-06, "epoch": 2.53, "percentage": 50.69, "elapsed_time": "0:33:16", "remaining_time": "0:32:22"}
+{"current_steps": 1300, "total_steps": 2545, "loss": 0.2568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8911415296208555e-06, "epoch": 2.55, "percentage": 51.08, "elapsed_time": "0:33:33", "remaining_time": "0:32:08"}
+{"current_steps": 1310, "total_steps": 2545, "loss": 0.2363, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.828956550066006e-06, "epoch": 2.57, "percentage": 51.47, "elapsed_time": "0:33:48", "remaining_time": "0:31:52"}
+{"current_steps": 1320, "total_steps": 2545, "loss": 0.4116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.766798048062913e-06, "epoch": 2.59, "percentage": 51.87, "elapsed_time": "0:34:02", "remaining_time": "0:31:35"}
+{"current_steps": 1330, "total_steps": 2545, "loss": 0.3128, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.704675645757028e-06, "epoch": 2.61, "percentage": 52.26, "elapsed_time": "0:34:18", "remaining_time": "0:31:20"}
+{"current_steps": 1340, "total_steps": 2545, "loss": 0.2368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.642598959705565e-06, "epoch": 2.63, "percentage": 52.65, "elapsed_time": "0:34:31", "remaining_time": "0:31:02"}
+{"current_steps": 1350, "total_steps": 2545, "loss": 0.3453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.580577599388854e-06, "epoch": 2.65, "percentage": 53.05, "elapsed_time": "0:34:44", "remaining_time": "0:30:45"}
+{"current_steps": 1360, "total_steps": 2545, "loss": 0.2893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.518621165722794e-06, "epoch": 2.67, "percentage": 53.44, "elapsed_time": "0:34:57", "remaining_time": "0:30:27"}
+{"current_steps": 1370, "total_steps": 2545, "loss": 0.3422, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.456739249572628e-06, "epoch": 2.69, "percentage": 53.83, "elapsed_time": "0:35:12", "remaining_time": "0:30:11"}
+{"current_steps": 1380, "total_steps": 2545, "loss": 0.3076, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.394941430268275e-06, "epoch": 2.71, "percentage": 54.22, "elapsed_time": "0:35:24", "remaining_time": "0:29:53"}
+{"current_steps": 1390, "total_steps": 2545, "loss": 0.3504, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.333237274121453e-06, "epoch": 2.73, "percentage": 54.62, "elapsed_time": "0:35:38", "remaining_time": "0:29:36"}
+{"current_steps": 1400, "total_steps": 2545, "loss": 0.2418, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.271636332944806e-06, "epoch": 2.75, "percentage": 55.01, "elapsed_time": "0:35:51", "remaining_time": "0:29:19"}
+{"current_steps": 1400, "total_steps": 2545, "loss": null, "eval_loss": 0.3422459363937378, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.75, "percentage": 55.01, "elapsed_time": "0:35:51", "remaining_time": "0:29:19"}
+{"current_steps": 1410, "total_steps": 2545, "loss": 0.3114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.210148142573287e-06, "epoch": 2.77, "percentage": 55.4, "elapsed_time": "0:36:45", "remaining_time": "0:29:35"}
+{"current_steps": 1420, "total_steps": 2545, "loss": 0.3683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.148782221388007e-06, "epoch": 2.79, "percentage": 55.8, "elapsed_time": "0:37:00", "remaining_time": "0:29:18"}
+{"current_steps": 1430, "total_steps": 2545, "loss": 0.3036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.087548068842787e-06, "epoch": 2.81, "percentage": 56.19, "elapsed_time": "0:37:13", "remaining_time": "0:29:01"}
+{"current_steps": 1440, "total_steps": 2545, "loss": 0.3015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.026455163993645e-06, "epoch": 2.83, "percentage": 56.58, "elapsed_time": "0:37:26", "remaining_time": "0:28:44"}
+{"current_steps": 1450, "total_steps": 2545, "loss": 0.2794, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.965512964031435e-06, "epoch": 2.85, "percentage": 56.97, "elapsed_time": "0:37:40", "remaining_time": "0:28:27"}
+{"current_steps": 1460, "total_steps": 2545, "loss": 0.2911, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.904730902817867e-06, "epoch": 2.87, "percentage": 57.37, "elapsed_time": "0:37:52", "remaining_time": "0:28:08"}
+{"current_steps": 1470, "total_steps": 2545, "loss": 0.2511, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.844118389425154e-06, "epoch": 2.89, "percentage": 57.76, "elapsed_time": "0:38:07", "remaining_time": "0:27:52"}
+{"current_steps": 1480, "total_steps": 2545, "loss": 0.3791, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7836848066794763e-06, "epoch": 2.91, "percentage": 58.15, "elapsed_time": "0:38:21", "remaining_time": "0:27:36"}
+{"current_steps": 1490, "total_steps": 2545, "loss": 0.3396, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7234395097085298e-06, "epoch": 2.93, "percentage": 58.55, "elapsed_time": "0:38:33", "remaining_time": "0:27:18"}
+{"current_steps": 1500, "total_steps": 2545, "loss": 0.1901, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.663391824493336e-06, "epoch": 2.95, "percentage": 58.94, "elapsed_time": "0:38:47", "remaining_time": "0:27:01"}
+{"current_steps": 1510, "total_steps": 2545, "loss": 0.3611, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6035510464245937e-06, "epoch": 2.97, "percentage": 59.33, "elapsed_time": "0:39:02", "remaining_time": "0:26:45"}
+{"current_steps": 1520, "total_steps": 2545, "loss": 0.2753, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5439264388637407e-06, "epoch": 2.99, "percentage": 59.72, "elapsed_time": "0:39:16", "remaining_time": "0:26:29"}
+{"current_steps": 1530, "total_steps": 2545, "loss": 0.2861, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4845272317089882e-06, "epoch": 3.01, "percentage": 60.12, "elapsed_time": "0:39:29", "remaining_time": "0:26:11"}
+{"current_steps": 1540, "total_steps": 2545, "loss": 0.2989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4253626199665314e-06, "epoch": 3.03, "percentage": 60.51, "elapsed_time": "0:39:42", "remaining_time": "0:25:54"}
+{"current_steps": 1550, "total_steps": 2545, "loss": 0.3478, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3664417623271594e-06, "epoch": 3.05, "percentage": 60.9, "elapsed_time": "0:39:56", "remaining_time": "0:25:38"}
+{"current_steps": 1560, "total_steps": 2545, "loss": 0.2654, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3077737797484923e-06, "epoch": 3.06, "percentage": 61.3, "elapsed_time": "0:40:10", "remaining_time": "0:25:21"}
+{"current_steps": 1570, "total_steps": 2545, "loss": 0.2063, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.249367754043047e-06, "epoch": 3.08, "percentage": 61.69, "elapsed_time": "0:40:25", "remaining_time": "0:25:06"}
+{"current_steps": 1580, "total_steps": 2545, "loss": 0.3329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1912327264723843e-06, "epoch": 3.1, "percentage": 62.08, "elapsed_time": "0:40:39", "remaining_time": "0:24:49"}
+{"current_steps": 1590, "total_steps": 2545, "loss": 0.3934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.133377696347506e-06, "epoch": 3.12, "percentage": 62.48, "elapsed_time": "0:40:53", "remaining_time": "0:24:33"}
+{"current_steps": 1600, "total_steps": 2545, "loss": 0.2528, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.075811619635774e-06, "epoch": 3.14, "percentage": 62.87, "elapsed_time": "0:41:07", "remaining_time": "0:24:17"}
+{"current_steps": 1600, "total_steps": 2545, "loss": null, "eval_loss": 0.33900028467178345, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 3.14, "percentage": 62.87, "elapsed_time": "0:41:07", "remaining_time": "0:24:17"}
+{"current_steps": 1610, "total_steps": 2545, "loss": 0.247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0185434075745124e-06, "epoch": 3.16, "percentage": 63.26, "elapsed_time": "0:41:59", "remaining_time": "0:24:23"}
+{"current_steps": 1620, "total_steps": 2545, "loss": 0.313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.961581925291557e-06, "epoch": 3.18, "percentage": 63.65, "elapsed_time": "0:42:12", "remaining_time": "0:24:06"}
+{"current_steps": 1630, "total_steps": 2545, "loss": 0.3331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9049359904329234e-06, "epoch": 3.2, "percentage": 64.05, "elapsed_time": "0:42:27", "remaining_time": "0:23:49"}
+{"current_steps": 1640, "total_steps": 2545, "loss": 0.2684, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8486143717978448e-06, "epoch": 3.22, "percentage": 64.44, "elapsed_time": "0:42:40", "remaining_time": "0:23:32"}
+{"current_steps": 1650, "total_steps": 2545, "loss": 0.2743, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.792625787981349e-06, "epoch": 3.24, "percentage": 64.83, "elapsed_time": "0:42:52", "remaining_time": "0:23:15"}
+{"current_steps": 1660, "total_steps": 2545, "loss": 0.3512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.736978906024626e-06, "epoch": 3.26, "percentage": 65.23, "elapsed_time": "0:43:08", "remaining_time": "0:22:59"}
+{"current_steps": 1670, "total_steps": 2545, "loss": 0.2919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6816823400733628e-06, "epoch": 3.28, "percentage": 65.62, "elapsed_time": "0:43:22", "remaining_time": "0:22:43"}
+{"current_steps": 1680, "total_steps": 2545, "loss": 0.322, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.626744650044284e-06, "epoch": 3.3, "percentage": 66.01, "elapsed_time": "0:43:34", "remaining_time": "0:22:26"}
+{"current_steps": 1690, "total_steps": 2545, "loss": 0.2349, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.572174340300061e-06, "epoch": 3.32, "percentage": 66.4, "elapsed_time": "0:43:52", "remaining_time": "0:22:11"}
+{"current_steps": 1700, "total_steps": 2545, "loss": 0.2368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5179798583328415e-06, "epoch": 3.34, "percentage": 66.8, "elapsed_time": "0:44:05", "remaining_time": "0:21:54"}
+{"current_steps": 1710, "total_steps": 2545, "loss": 0.3711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4641695934565827e-06, "epoch": 3.36, "percentage": 67.19, "elapsed_time": "0:44:19", "remaining_time": "0:21:38"}
+{"current_steps": 1720, "total_steps": 2545, "loss": 0.2323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.410751875508373e-06, "epoch": 3.38, "percentage": 67.58, "elapsed_time": "0:44:34", "remaining_time": "0:21:22"}
+{"current_steps": 1730, "total_steps": 2545, "loss": 0.3185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3577349735589787e-06, "epoch": 3.4, "percentage": 67.98, "elapsed_time": "0:44:49", "remaining_time": "0:21:07"}
+{"current_steps": 1740, "total_steps": 2545, "loss": 0.2479, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3051270946327887e-06, "epoch": 3.42, "percentage": 68.37, "elapsed_time": "0:45:05", "remaining_time": "0:20:51"}
+{"current_steps": 1750, "total_steps": 2545, "loss": 0.3738, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2529363824373707e-06, "epoch": 3.44, "percentage": 68.76, "elapsed_time": "0:45:18", "remaining_time": "0:20:35"}
+{"current_steps": 1760, "total_steps": 2545, "loss": 0.3481, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2011709161028156e-06, "epoch": 3.46, "percentage": 69.16, "elapsed_time": "0:45:30", "remaining_time": "0:20:17"}
+{"current_steps": 1770, "total_steps": 2545, "loss": 0.2863, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.149838708931087e-06, "epoch": 3.48, "percentage": 69.55, "elapsed_time": "0:45:43", "remaining_time": "0:20:01"}
+{"current_steps": 1780, "total_steps": 2545, "loss": 0.2415, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0989477071555757e-06, "epoch": 3.5, "percentage": 69.94, "elapsed_time": "0:45:58", "remaining_time": "0:19:45"}
+{"current_steps": 1790, "total_steps": 2545, "loss": 0.2989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0485057887110026e-06, "epoch": 3.52, "percentage": 70.33, "elapsed_time": "0:46:10", "remaining_time": "0:19:28"}
+{"current_steps": 1800, "total_steps": 2545, "loss": 0.3192, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.998520762013923e-06, "epoch": 3.54, "percentage": 70.73, "elapsed_time": "0:46:24", "remaining_time": "0:19:12"}
+{"current_steps": 1800, "total_steps": 2545, "loss": null, "eval_loss": 0.33926254510879517, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 3.54, "percentage": 70.73, "elapsed_time": "0:46:24", "remaining_time": "0:19:12"}
+{"current_steps": 1810, "total_steps": 2545, "loss": 0.2484, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.949000364753985e-06, "epoch": 3.56, "percentage": 71.12, "elapsed_time": "0:47:16", "remaining_time": "0:19:11"}
+{"current_steps": 1820, "total_steps": 2545, "loss": 0.2761, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8999522626961254e-06, "epoch": 3.58, "percentage": 71.51, "elapsed_time": "0:47:31", "remaining_time": "0:18:55"}
+{"current_steps": 1830, "total_steps": 2545, "loss": 0.2972, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.851384048493914e-06, "epoch": 3.6, "percentage": 71.91, "elapsed_time": "0:47:43", "remaining_time": "0:18:38"}
+{"current_steps": 1840, "total_steps": 2545, "loss": 0.2279, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8033032405142075e-06, "epoch": 3.61, "percentage": 72.3, "elapsed_time": "0:47:55", "remaining_time": "0:18:21"}
+{"current_steps": 1850, "total_steps": 2545, "loss": 0.2662, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7557172816733104e-06, "epoch": 3.63, "percentage": 72.69, "elapsed_time": "0:48:08", "remaining_time": "0:18:05"}
+{"current_steps": 1860, "total_steps": 2545, "loss": 0.3054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7086335382848012e-06, "epoch": 3.65, "percentage": 73.08, "elapsed_time": "0:48:21", "remaining_time": "0:17:48"}
+{"current_steps": 1870, "total_steps": 2545, "loss": 0.3051, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6620592989192318e-06, "epoch": 3.67, "percentage": 73.48, "elapsed_time": "0:48:34", "remaining_time": "0:17:31"}
+{"current_steps": 1880, "total_steps": 2545, "loss": 0.2765, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6160017732758588e-06, "epoch": 3.69, "percentage": 73.87, "elapsed_time": "0:48:49", "remaining_time": "0:17:16"}
+{"current_steps": 1890, "total_steps": 2545, "loss": 0.2839, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5704680910665721e-06, "epoch": 3.71, "percentage": 74.26, "elapsed_time": "0:49:04", "remaining_time": "0:17:00"}
+{"current_steps": 1900, "total_steps": 2545, "loss": 0.2527, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5254653009122206e-06, "epoch": 3.73, "percentage": 74.66, "elapsed_time": "0:49:18", "remaining_time": "0:16:44"}
+{"current_steps": 1910, "total_steps": 2545, "loss": 0.2268, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4810003692514829e-06, "epoch": 3.75, "percentage": 75.05, "elapsed_time": "0:49:31", "remaining_time": "0:16:27"}
+{"current_steps": 1920, "total_steps": 2545, "loss": 0.3011, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4370801792624656e-06, "epoch": 3.77, "percentage": 75.44, "elapsed_time": "0:49:44", "remaining_time": "0:16:11"}
+{"current_steps": 1930, "total_steps": 2545, "loss": 0.2938, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3937115297971788e-06, "epoch": 3.79, "percentage": 75.83, "elapsed_time": "0:49:57", "remaining_time": "0:15:55"}
+{"current_steps": 1940, "total_steps": 2545, "loss": 0.2375, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3509011343290767e-06, "epoch": 3.81, "percentage": 76.23, "elapsed_time": "0:50:12", "remaining_time": "0:15:39"}
+{"current_steps": 1950, "total_steps": 2545, "loss": 0.2748, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3086556199138117e-06, "epoch": 3.83, "percentage": 76.62, "elapsed_time": "0:50:27", "remaining_time": "0:15:23"}
+{"current_steps": 1960, "total_steps": 2545, "loss": 0.2703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2669815261633666e-06, "epoch": 3.85, "percentage": 77.01, "elapsed_time": "0:50:42", "remaining_time": "0:15:07"}
+{"current_steps": 1970, "total_steps": 2545, "loss": 0.2331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.225885304233716e-06, "epoch": 3.87, "percentage": 77.41, "elapsed_time": "0:50:55", "remaining_time": "0:14:51"}
+{"current_steps": 1980, "total_steps": 2545, "loss": 0.3299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1853733158261894e-06, "epoch": 3.89, "percentage": 77.8, "elapsed_time": "0:51:09", "remaining_time": "0:14:35"}
+{"current_steps": 1990, "total_steps": 2545, "loss": 0.2159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1454518322026836e-06, "epoch": 3.91, "percentage": 78.19, "elapsed_time": "0:51:21", "remaining_time": "0:14:19"}
+{"current_steps": 2000, "total_steps": 2545, "loss": 0.2834, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1061270332148626e-06, "epoch": 3.93, "percentage": 78.59, "elapsed_time": "0:51:35", "remaining_time": "0:14:03"}
+{"current_steps": 2000, "total_steps": 2545, "loss": null, "eval_loss": 0.3370112180709839, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 3.93, "percentage": 78.59, "elapsed_time": "0:51:35", "remaining_time": "0:14:03"}
+{"current_steps": 2010, "total_steps": 2545, "loss": 0.2315, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.06740500634752e-06, "epoch": 3.95, "percentage": 78.98, "elapsed_time": "0:52:29", "remaining_time": "0:13:58"}
+{"current_steps": 2020, "total_steps": 2545, "loss": 0.32, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0292917457762325e-06, "epoch": 3.97, "percentage": 79.37, "elapsed_time": "0:52:41", "remaining_time": "0:13:41"}
+{"current_steps": 2030, "total_steps": 2545, "loss": 0.3086, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.917931514394674e-07, "epoch": 3.99, "percentage": 79.76, "elapsed_time": "0:52:55", "remaining_time": "0:13:25"}
+{"current_steps": 2040, "total_steps": 2545, "loss": 0.327, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.549150281252633e-07, "epoch": 4.01, "percentage": 80.16, "elapsed_time": "0:53:11", "remaining_time": "0:13:09"}
+{"current_steps": 2050, "total_steps": 2545, "loss": 0.2789, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.186630845726497e-07, "epoch": 4.03, "percentage": 80.55, "elapsed_time": "0:53:24", "remaining_time": "0:12:53"}
+{"current_steps": 2060, "total_steps": 2545, "loss": 0.305, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.830429325879397e-07, "epoch": 4.05, "percentage": 80.94, "elapsed_time": "0:53:39", "remaining_time": "0:12:37"}
+{"current_steps": 2070, "total_steps": 2545, "loss": 0.2732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.480600861760124e-07, "epoch": 4.07, "percentage": 81.34, "elapsed_time": "0:53:51", "remaining_time": "0:12:21"}
+{"current_steps": 2080, "total_steps": 2545, "loss": 0.2897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.137199606867507e-07, "epoch": 4.09, "percentage": 81.73, "elapsed_time": "0:54:04", "remaining_time": "0:12:05"}
+{"current_steps": 2090, "total_steps": 2545, "loss": 0.3181, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.800278719767407e-07, "epoch": 4.11, "percentage": 82.12, "elapsed_time": "0:54:16", "remaining_time": "0:11:48"}
+{"current_steps": 2100, "total_steps": 2545, "loss": 0.2144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.469890355863785e-07, "epoch": 4.13, "percentage": 82.51, "elapsed_time": "0:54:28", "remaining_time": "0:11:32"}
+{"current_steps": 2110, "total_steps": 2545, "loss": 0.294, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.146085659325025e-07, "epoch": 4.15, "percentage": 82.91, "elapsed_time": "0:54:44", "remaining_time": "0:11:17"}
+{"current_steps": 2120, "total_steps": 2545, "loss": 0.2729, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.828914755166826e-07, "epoch": 4.17, "percentage": 83.3, "elapsed_time": "0:54:58", "remaining_time": "0:11:01"}
+{"current_steps": 2130, "total_steps": 2545, "loss": 0.2509, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.518426741492823e-07, "epoch": 4.18, "percentage": 83.69, "elapsed_time": "0:55:13", "remaining_time": "0:10:45"}
+{"current_steps": 2140, "total_steps": 2545, "loss": 0.2271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.214669681894181e-07, "epoch": 4.2, "percentage": 84.09, "elapsed_time": "0:55:27", "remaining_time": "0:10:29"}
+{"current_steps": 2150, "total_steps": 2545, "loss": 0.2703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.917690598009385e-07, "epoch": 4.22, "percentage": 84.48, "elapsed_time": "0:55:40", "remaining_time": "0:10:13"}
+{"current_steps": 2160, "total_steps": 2545, "loss": 0.2339, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.62753546224526e-07, "epoch": 4.24, "percentage": 84.87, "elapsed_time": "0:55:54", "remaining_time": "0:09:57"}
+{"current_steps": 2170, "total_steps": 2545, "loss": 0.3029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.344249190660427e-07, "epoch": 4.26, "percentage": 85.27, "elapsed_time": "0:56:09", "remaining_time": "0:09:42"}
+{"current_steps": 2180, "total_steps": 2545, "loss": 0.3205, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.067875636012304e-07, "epoch": 4.28, "percentage": 85.66, "elapsed_time": "0:56:24", "remaining_time": "0:09:26"}
+{"current_steps": 2190, "total_steps": 2545, "loss": 0.2875, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.798457580968674e-07, "epoch": 4.3, "percentage": 86.05, "elapsed_time": "0:56:37", "remaining_time": "0:09:10"}
+{"current_steps": 2200, "total_steps": 2545, "loss": 0.3612, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.536036731484972e-07, "epoch": 4.32, "percentage": 86.44, "elapsed_time": "0:56:55", "remaining_time": "0:08:55"}
+{"current_steps": 2200, "total_steps": 2545, "loss": null, "eval_loss": 0.337531715631485, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 4.32, "percentage": 86.44, "elapsed_time": "0:56:55", "remaining_time": "0:08:55"}
+{"current_steps": 2210, "total_steps": 2545, "loss": 0.2825, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.280653710348126e-07, "epoch": 4.34, "percentage": 86.84, "elapsed_time": "0:57:46", "remaining_time": "0:08:45"}
+{"current_steps": 2220, "total_steps": 2545, "loss": 0.323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.03234805088818e-07, "epoch": 4.36, "percentage": 87.23, "elapsed_time": "0:58:01", "remaining_time": "0:08:29"}
+{"current_steps": 2230, "total_steps": 2545, "loss": 0.2784, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7911581908585626e-07, "epoch": 4.38, "percentage": 87.62, "elapsed_time": "0:58:12", "remaining_time": "0:08:13"}
+{"current_steps": 2240, "total_steps": 2545, "loss": 0.2632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5571214664858356e-07, "epoch": 4.4, "percentage": 88.02, "elapsed_time": "0:58:25", "remaining_time": "0:07:57"}
+{"current_steps": 2250, "total_steps": 2545, "loss": 0.2192, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.330274106690112e-07, "epoch": 4.42, "percentage": 88.41, "elapsed_time": "0:58:36", "remaining_time": "0:07:41"}
+{"current_steps": 2260, "total_steps": 2545, "loss": 0.2647, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.110651227476774e-07, "epoch": 4.44, "percentage": 88.8, "elapsed_time": "0:58:50", "remaining_time": "0:07:25"}
+{"current_steps": 2270, "total_steps": 2545, "loss": 0.2897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8982868265005457e-07, "epoch": 4.46, "percentage": 89.19, "elapsed_time": "0:59:04", "remaining_time": "0:07:09"}
+{"current_steps": 2280, "total_steps": 2545, "loss": 0.3023, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.693213777802606e-07, "epoch": 4.48, "percentage": 89.59, "elapsed_time": "0:59:18", "remaining_time": "0:06:53"}
+{"current_steps": 2290, "total_steps": 2545, "loss": 0.2419, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.495463826721711e-07, "epoch": 4.5, "percentage": 89.98, "elapsed_time": "0:59:33", "remaining_time": "0:06:37"}
+{"current_steps": 2300, "total_steps": 2545, "loss": 0.3104, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.305067584980014e-07, "epoch": 4.52, "percentage": 90.37, "elapsed_time": "0:59:48", "remaining_time": "0:06:22"}
+{"current_steps": 2310, "total_steps": 2545, "loss": 0.328, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1220545259443558e-07, "epoch": 4.54, "percentage": 90.77, "elapsed_time": "1:00:01", "remaining_time": "0:06:06"}
+{"current_steps": 2320, "total_steps": 2545, "loss": 0.307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9464529800637731e-07, "epoch": 4.56, "percentage": 91.16, "elapsed_time": "1:00:15", "remaining_time": "0:05:50"}
+{"current_steps": 2330, "total_steps": 2545, "loss": 0.2437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7782901304839617e-07, "epoch": 4.58, "percentage": 91.55, "elapsed_time": "1:00:26", "remaining_time": "0:05:34"}
+{"current_steps": 2340, "total_steps": 2545, "loss": 0.2862, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.617592008839308e-07, "epoch": 4.6, "percentage": 91.94, "elapsed_time": "1:00:38", "remaining_time": "0:05:18"}
+{"current_steps": 2350, "total_steps": 2545, "loss": 0.2901, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4643834912232035e-07, "epoch": 4.62, "percentage": 92.34, "elapsed_time": "1:00:51", "remaining_time": "0:05:02"}
+{"current_steps": 2360, "total_steps": 2545, "loss": 0.2482, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3186882943371892e-07, "epoch": 4.64, "percentage": 92.73, "elapsed_time": "1:01:05", "remaining_time": "0:04:47"}
+{"current_steps": 2370, "total_steps": 2545, "loss": 0.2957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1805289718196499e-07, "epoch": 4.66, "percentage": 93.12, "elapsed_time": "1:01:17", "remaining_time": "0:04:31"}
+{"current_steps": 2380, "total_steps": 2545, "loss": 0.3179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0499269107544674e-07, "epoch": 4.68, "percentage": 93.52, "elapsed_time": "1:01:30", "remaining_time": "0:04:15"}
+{"current_steps": 2390, "total_steps": 2545, "loss": 0.277, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.269023283603295e-08, "epoch": 4.7, "percentage": 93.91, "elapsed_time": "1:01:43", "remaining_time": "0:04:00"}
+{"current_steps": 2400, "total_steps": 2545, "loss": 0.2732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.114742688610789e-08, "epoch": 4.72, "percentage": 94.3, "elapsed_time": "1:01:57", "remaining_time": "0:03:44"}
+{"current_steps": 2400, "total_steps": 2545, "loss": null, "eval_loss": 0.33687904477119446, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 4.72, "percentage": 94.3, "elapsed_time": "1:01:57", "remaining_time": "0:03:44"}
+{"current_steps": 2410, "total_steps": 2545, "loss": 0.2598, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.036606005376701e-08, "epoch": 4.73, "percentage": 94.7, "elapsed_time": "1:02:51", "remaining_time": "0:03:31"}
+{"current_steps": 2420, "total_steps": 2545, "loss": 0.2599, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.034780129621664e-08, "epoch": 4.75, "percentage": 95.09, "elapsed_time": "1:03:05", "remaining_time": "0:03:15"}
+{"current_steps": 2430, "total_steps": 2545, "loss": 0.2699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.1094201441417855e-08, "epoch": 4.77, "percentage": 95.48, "elapsed_time": "1:03:20", "remaining_time": "0:02:59"}
+{"current_steps": 2440, "total_steps": 2545, "loss": 0.2819, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.260669294801956e-08, "epoch": 4.79, "percentage": 95.87, "elapsed_time": "1:03:34", "remaining_time": "0:02:44"}
+{"current_steps": 2450, "total_steps": 2545, "loss": 0.2666, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.488658968361147e-08, "epoch": 4.81, "percentage": 96.27, "elapsed_time": "1:03:50", "remaining_time": "0:02:28"}
+{"current_steps": 2460, "total_steps": 2545, "loss": 0.2418, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.79350867213396e-08, "epoch": 4.83, "percentage": 96.66, "elapsed_time": "1:04:05", "remaining_time": "0:02:12"}
+{"current_steps": 2470, "total_steps": 2545, "loss": 0.1971, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1753260154906973e-08, "epoch": 4.85, "percentage": 97.05, "elapsed_time": "1:04:19", "remaining_time": "0:01:57"}
+{"current_steps": 2480, "total_steps": 2545, "loss": 0.3358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6342066931995804e-08, "epoch": 4.87, "percentage": 97.45, "elapsed_time": "1:04:32", "remaining_time": "0:01:41"}
+{"current_steps": 2490, "total_steps": 2545, "loss": 0.2409, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1702344706129298e-08, "epoch": 4.89, "percentage": 97.84, "elapsed_time": "1:04:48", "remaining_time": "0:01:25"}
+{"current_steps": 2500, "total_steps": 2545, "loss": 0.2549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.834811707005952e-09, "epoch": 4.91, "percentage": 98.23, "elapsed_time": "1:05:01", "remaining_time": "0:01:10"}
+{"current_steps": 2510, "total_steps": 2545, "loss": 0.2957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.740066629315143e-09, "epoch": 4.93, "percentage": 98.62, "elapsed_time": "1:05:14", "remaining_time": "0:00:54"}
+{"current_steps": 2520, "total_steps": 2545, "loss": 0.265, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4185885400596076e-09, "epoch": 4.95, "percentage": 99.02, "elapsed_time": "1:05:28", "remaining_time": "0:00:38"}
+{"current_steps": 2530, "total_steps": 2545, "loss": 0.2817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.707368043975318e-10, "epoch": 4.97, "percentage": 99.41, "elapsed_time": "1:05:40", "remaining_time": "0:00:23"}
+{"current_steps": 2540, "total_steps": 2545, "loss": 0.2591, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.675103000872377e-11, "epoch": 4.99, "percentage": 99.8, "elapsed_time": "1:05:55", "remaining_time": "0:00:07"}
+{"current_steps": 2545, "total_steps": 2545, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "1:06:01", "remaining_time": "0:00:00"}
+{"current_steps": 90, "total_steps": 90, "loss": null, "eval_loss": 0.3370112180709839, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "1:06:48", "remaining_time": "0:00:00"}
diff --git a/llama2_13b_peft/unit_conversion/trainer_state.json b/llama2_13b_peft/unit_conversion/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..b573d8c23554d23ac12bc48d76d0ff028e051fb2
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/trainer_state.json
@@ -0,0 +1,1904 @@
+{
+ "best_metric": 0.3370112180709839,
+ "best_model_checkpoint": "ckpt/llama2_13b_fuze30_no_sys/unit_conversion_no_sys/checkpoint-2000",
+ "epoch": 5.0,
+ "eval_steps": 200,
+ "global_step": 2545,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.02,
+ "grad_norm": 1.0320725440979004,
+ "learning_rate": 5e-06,
+ "loss": 1.6608,
+ "step": 10
+ },
+ {
+ "epoch": 0.04,
+ "grad_norm": 1.4061148166656494,
+ "learning_rate": 1e-05,
+ "loss": 1.7201,
+ "step": 20
+ },
+ {
+ "epoch": 0.06,
+ "grad_norm": 1.7033771276474,
+ "learning_rate": 9.99961299962427e-06,
+ "loss": 1.6975,
+ "step": 30
+ },
+ {
+ "epoch": 0.08,
+ "grad_norm": 1.8334242105484009,
+ "learning_rate": 9.998452058404793e-06,
+ "loss": 1.3794,
+ "step": 40
+ },
+ {
+ "epoch": 0.1,
+ "grad_norm": 1.2908776998519897,
+ "learning_rate": 9.996517356055444e-06,
+ "loss": 1.0864,
+ "step": 50
+ },
+ {
+ "epoch": 0.12,
+ "grad_norm": 0.6934323906898499,
+ "learning_rate": 9.99380919206844e-06,
+ "loss": 0.6983,
+ "step": 60
+ },
+ {
+ "epoch": 0.14,
+ "grad_norm": 0.5397089719772339,
+ "learning_rate": 9.990327985667972e-06,
+ "loss": 0.564,
+ "step": 70
+ },
+ {
+ "epoch": 0.16,
+ "grad_norm": 0.7038566470146179,
+ "learning_rate": 9.986074275745314e-06,
+ "loss": 0.5864,
+ "step": 80
+ },
+ {
+ "epoch": 0.18,
+ "grad_norm": 0.6588059067726135,
+ "learning_rate": 9.981048720775401e-06,
+ "loss": 0.4707,
+ "step": 90
+ },
+ {
+ "epoch": 0.2,
+ "grad_norm": 0.6290227174758911,
+ "learning_rate": 9.975252098714898e-06,
+ "loss": 0.5636,
+ "step": 100
+ },
+ {
+ "epoch": 0.22,
+ "grad_norm": 0.5712002515792847,
+ "learning_rate": 9.968685306881772e-06,
+ "loss": 0.4544,
+ "step": 110
+ },
+ {
+ "epoch": 0.24,
+ "grad_norm": 0.47688916325569153,
+ "learning_rate": 9.961349361816384e-06,
+ "loss": 0.472,
+ "step": 120
+ },
+ {
+ "epoch": 0.26,
+ "grad_norm": 0.5468528866767883,
+ "learning_rate": 9.953245399124133e-06,
+ "loss": 0.4864,
+ "step": 130
+ },
+ {
+ "epoch": 0.28,
+ "grad_norm": 0.5000003576278687,
+ "learning_rate": 9.94437467329966e-06,
+ "loss": 0.4197,
+ "step": 140
+ },
+ {
+ "epoch": 0.29,
+ "grad_norm": 0.5887648463249207,
+ "learning_rate": 9.934738557532663e-06,
+ "loss": 0.4712,
+ "step": 150
+ },
+ {
+ "epoch": 0.31,
+ "grad_norm": 0.40465375781059265,
+ "learning_rate": 9.924338543495302e-06,
+ "loss": 0.3896,
+ "step": 160
+ },
+ {
+ "epoch": 0.33,
+ "grad_norm": 0.5504217743873596,
+ "learning_rate": 9.91317624111132e-06,
+ "loss": 0.5073,
+ "step": 170
+ },
+ {
+ "epoch": 0.35,
+ "grad_norm": 0.6431964635848999,
+ "learning_rate": 9.9012533783068e-06,
+ "loss": 0.4667,
+ "step": 180
+ },
+ {
+ "epoch": 0.37,
+ "grad_norm": 0.4865589737892151,
+ "learning_rate": 9.888571800742694e-06,
+ "loss": 0.4281,
+ "step": 190
+ },
+ {
+ "epoch": 0.39,
+ "grad_norm": 0.6109912395477295,
+ "learning_rate": 9.87513347152912e-06,
+ "loss": 0.5422,
+ "step": 200
+ },
+ {
+ "epoch": 0.39,
+ "eval_loss": 0.47923311591148376,
+ "eval_runtime": 39.2164,
+ "eval_samples_per_second": 73.26,
+ "eval_steps_per_second": 2.295,
+ "step": 200
+ },
+ {
+ "epoch": 0.41,
+ "grad_norm": 0.6530150175094604,
+ "learning_rate": 9.860940470921457e-06,
+ "loss": 0.4597,
+ "step": 210
+ },
+ {
+ "epoch": 0.43,
+ "grad_norm": 0.8157398700714111,
+ "learning_rate": 9.845994995998332e-06,
+ "loss": 0.4876,
+ "step": 220
+ },
+ {
+ "epoch": 0.45,
+ "grad_norm": 0.6635129451751709,
+ "learning_rate": 9.830299360321509e-06,
+ "loss": 0.5003,
+ "step": 230
+ },
+ {
+ "epoch": 0.47,
+ "grad_norm": 0.5162450671195984,
+ "learning_rate": 9.813855993577753e-06,
+ "loss": 0.3836,
+ "step": 240
+ },
+ {
+ "epoch": 0.49,
+ "grad_norm": 0.6863852143287659,
+ "learning_rate": 9.796667441202705e-06,
+ "loss": 0.4631,
+ "step": 250
+ },
+ {
+ "epoch": 0.51,
+ "grad_norm": 0.5410252809524536,
+ "learning_rate": 9.778736363986854e-06,
+ "loss": 0.4101,
+ "step": 260
+ },
+ {
+ "epoch": 0.53,
+ "grad_norm": 0.6589216589927673,
+ "learning_rate": 9.76006553766365e-06,
+ "loss": 0.4622,
+ "step": 270
+ },
+ {
+ "epoch": 0.55,
+ "grad_norm": 0.8363272547721863,
+ "learning_rate": 9.740657852479815e-06,
+ "loss": 0.4378,
+ "step": 280
+ },
+ {
+ "epoch": 0.57,
+ "grad_norm": 0.6521047949790955,
+ "learning_rate": 9.72051631274793e-06,
+ "loss": 0.3582,
+ "step": 290
+ },
+ {
+ "epoch": 0.59,
+ "grad_norm": 0.6268656253814697,
+ "learning_rate": 9.699644036381374e-06,
+ "loss": 0.45,
+ "step": 300
+ },
+ {
+ "epoch": 0.61,
+ "grad_norm": 0.8181853294372559,
+ "learning_rate": 9.678044254411665e-06,
+ "loss": 0.3289,
+ "step": 310
+ },
+ {
+ "epoch": 0.63,
+ "grad_norm": 0.7349339723587036,
+ "learning_rate": 9.655720310488298e-06,
+ "loss": 0.3618,
+ "step": 320
+ },
+ {
+ "epoch": 0.65,
+ "grad_norm": 0.8285214304924011,
+ "learning_rate": 9.632675660361148e-06,
+ "loss": 0.3579,
+ "step": 330
+ },
+ {
+ "epoch": 0.67,
+ "grad_norm": 1.1723264455795288,
+ "learning_rate": 9.60891387134552e-06,
+ "loss": 0.4039,
+ "step": 340
+ },
+ {
+ "epoch": 0.69,
+ "grad_norm": 0.8121698498725891,
+ "learning_rate": 9.58443862176992e-06,
+ "loss": 0.4668,
+ "step": 350
+ },
+ {
+ "epoch": 0.71,
+ "grad_norm": 0.6637731194496155,
+ "learning_rate": 9.559253700406663e-06,
+ "loss": 0.4174,
+ "step": 360
+ },
+ {
+ "epoch": 0.73,
+ "grad_norm": 1.7488420009613037,
+ "learning_rate": 9.533363005885362e-06,
+ "loss": 0.4081,
+ "step": 370
+ },
+ {
+ "epoch": 0.75,
+ "grad_norm": 0.805870771408081,
+ "learning_rate": 9.506770546089422e-06,
+ "loss": 0.3972,
+ "step": 380
+ },
+ {
+ "epoch": 0.77,
+ "grad_norm": 0.5776576399803162,
+ "learning_rate": 9.479480437535613e-06,
+ "loss": 0.3379,
+ "step": 390
+ },
+ {
+ "epoch": 0.79,
+ "grad_norm": 0.8170546889305115,
+ "learning_rate": 9.45149690473684e-06,
+ "loss": 0.319,
+ "step": 400
+ },
+ {
+ "epoch": 0.79,
+ "eval_loss": 0.41680005192756653,
+ "eval_runtime": 39.2491,
+ "eval_samples_per_second": 73.199,
+ "eval_steps_per_second": 2.293,
+ "step": 400
+ },
+ {
+ "epoch": 0.81,
+ "grad_norm": 0.8885582089424133,
+ "learning_rate": 9.422824279548189e-06,
+ "loss": 0.454,
+ "step": 410
+ },
+ {
+ "epoch": 0.83,
+ "grad_norm": 0.9332516193389893,
+ "learning_rate": 9.393467000496345e-06,
+ "loss": 0.3599,
+ "step": 420
+ },
+ {
+ "epoch": 0.84,
+ "grad_norm": 0.7631992697715759,
+ "learning_rate": 9.36342961209252e-06,
+ "loss": 0.3763,
+ "step": 430
+ },
+ {
+ "epoch": 0.86,
+ "grad_norm": 0.7638279795646667,
+ "learning_rate": 9.332716764128952e-06,
+ "loss": 0.3931,
+ "step": 440
+ },
+ {
+ "epoch": 0.88,
+ "grad_norm": 0.6363512873649597,
+ "learning_rate": 9.301333210959123e-06,
+ "loss": 0.3708,
+ "step": 450
+ },
+ {
+ "epoch": 0.9,
+ "grad_norm": 0.7823840379714966,
+ "learning_rate": 9.26928381076178e-06,
+ "loss": 0.3863,
+ "step": 460
+ },
+ {
+ "epoch": 0.92,
+ "grad_norm": 0.8903080821037292,
+ "learning_rate": 9.236573524788888e-06,
+ "loss": 0.3334,
+ "step": 470
+ },
+ {
+ "epoch": 0.94,
+ "grad_norm": 0.8487058281898499,
+ "learning_rate": 9.203207416597635e-06,
+ "loss": 0.4054,
+ "step": 480
+ },
+ {
+ "epoch": 0.96,
+ "grad_norm": 0.7155742049217224,
+ "learning_rate": 9.169190651266582e-06,
+ "loss": 0.3992,
+ "step": 490
+ },
+ {
+ "epoch": 0.98,
+ "grad_norm": 1.163004755973816,
+ "learning_rate": 9.134528494596116e-06,
+ "loss": 0.3113,
+ "step": 500
+ },
+ {
+ "epoch": 1.0,
+ "grad_norm": 0.9353289008140564,
+ "learning_rate": 9.099226312293296e-06,
+ "loss": 0.4078,
+ "step": 510
+ },
+ {
+ "epoch": 1.02,
+ "grad_norm": 0.8487387895584106,
+ "learning_rate": 9.063289569141251e-06,
+ "loss": 0.3958,
+ "step": 520
+ },
+ {
+ "epoch": 1.04,
+ "grad_norm": 0.8114432096481323,
+ "learning_rate": 9.026723828153224e-06,
+ "loss": 0.3513,
+ "step": 530
+ },
+ {
+ "epoch": 1.06,
+ "grad_norm": 0.7491832971572876,
+ "learning_rate": 8.98953474971141e-06,
+ "loss": 0.3857,
+ "step": 540
+ },
+ {
+ "epoch": 1.08,
+ "grad_norm": 0.7338570952415466,
+ "learning_rate": 8.951728090690743e-06,
+ "loss": 0.3646,
+ "step": 550
+ },
+ {
+ "epoch": 1.1,
+ "grad_norm": 0.8805945515632629,
+ "learning_rate": 8.913309703567722e-06,
+ "loss": 0.3052,
+ "step": 560
+ },
+ {
+ "epoch": 1.12,
+ "grad_norm": 1.1915472745895386,
+ "learning_rate": 8.87428553551445e-06,
+ "loss": 0.3649,
+ "step": 570
+ },
+ {
+ "epoch": 1.14,
+ "grad_norm": 1.0756891965866089,
+ "learning_rate": 8.834661627478003e-06,
+ "loss": 0.3371,
+ "step": 580
+ },
+ {
+ "epoch": 1.16,
+ "grad_norm": 0.8875855803489685,
+ "learning_rate": 8.794444113245302e-06,
+ "loss": 0.346,
+ "step": 590
+ },
+ {
+ "epoch": 1.18,
+ "grad_norm": 0.5038532018661499,
+ "learning_rate": 8.753639218493594e-06,
+ "loss": 0.3583,
+ "step": 600
+ },
+ {
+ "epoch": 1.18,
+ "eval_loss": 0.38733917474746704,
+ "eval_runtime": 39.2459,
+ "eval_samples_per_second": 73.205,
+ "eval_steps_per_second": 2.293,
+ "step": 600
+ },
+ {
+ "epoch": 1.2,
+ "grad_norm": 0.8062039613723755,
+ "learning_rate": 8.712253259826719e-06,
+ "loss": 0.3845,
+ "step": 610
+ },
+ {
+ "epoch": 1.22,
+ "grad_norm": 1.308142066001892,
+ "learning_rate": 8.670292643797302e-06,
+ "loss": 0.431,
+ "step": 620
+ },
+ {
+ "epoch": 1.24,
+ "grad_norm": 0.8702178597450256,
+ "learning_rate": 8.627763865915005e-06,
+ "loss": 0.3512,
+ "step": 630
+ },
+ {
+ "epoch": 1.26,
+ "grad_norm": 0.7164438962936401,
+ "learning_rate": 8.58467350964104e-06,
+ "loss": 0.3475,
+ "step": 640
+ },
+ {
+ "epoch": 1.28,
+ "grad_norm": 1.3485796451568604,
+ "learning_rate": 8.541028245369033e-06,
+ "loss": 0.3372,
+ "step": 650
+ },
+ {
+ "epoch": 1.3,
+ "grad_norm": 0.9907482266426086,
+ "learning_rate": 8.496834829392454e-06,
+ "loss": 0.3133,
+ "step": 660
+ },
+ {
+ "epoch": 1.32,
+ "grad_norm": 0.744209885597229,
+ "learning_rate": 8.452100102858734e-06,
+ "loss": 0.3274,
+ "step": 670
+ },
+ {
+ "epoch": 1.34,
+ "grad_norm": 1.4934322834014893,
+ "learning_rate": 8.40683099071027e-06,
+ "loss": 0.3771,
+ "step": 680
+ },
+ {
+ "epoch": 1.36,
+ "grad_norm": 0.9607213735580444,
+ "learning_rate": 8.361034500612421e-06,
+ "loss": 0.3636,
+ "step": 690
+ },
+ {
+ "epoch": 1.38,
+ "grad_norm": 0.31818854808807373,
+ "learning_rate": 8.31471772186874e-06,
+ "loss": 0.3026,
+ "step": 700
+ },
+ {
+ "epoch": 1.39,
+ "grad_norm": 1.0681700706481934,
+ "learning_rate": 8.267887824323537e-06,
+ "loss": 0.3075,
+ "step": 710
+ },
+ {
+ "epoch": 1.41,
+ "grad_norm": 0.7339015603065491,
+ "learning_rate": 8.22055205725199e-06,
+ "loss": 0.2668,
+ "step": 720
+ },
+ {
+ "epoch": 1.43,
+ "grad_norm": 0.9504737854003906,
+ "learning_rate": 8.172717748237955e-06,
+ "loss": 0.2809,
+ "step": 730
+ },
+ {
+ "epoch": 1.45,
+ "grad_norm": 1.125591516494751,
+ "learning_rate": 8.124392302039658e-06,
+ "loss": 0.2968,
+ "step": 740
+ },
+ {
+ "epoch": 1.47,
+ "grad_norm": 0.9738477468490601,
+ "learning_rate": 8.075583199443431e-06,
+ "loss": 0.3148,
+ "step": 750
+ },
+ {
+ "epoch": 1.49,
+ "grad_norm": 1.076797604560852,
+ "learning_rate": 8.026297996105694e-06,
+ "loss": 0.3266,
+ "step": 760
+ },
+ {
+ "epoch": 1.51,
+ "grad_norm": 0.5665436387062073,
+ "learning_rate": 7.97654432138333e-06,
+ "loss": 0.3143,
+ "step": 770
+ },
+ {
+ "epoch": 1.53,
+ "grad_norm": 0.8260796070098877,
+ "learning_rate": 7.926329877152665e-06,
+ "loss": 0.3853,
+ "step": 780
+ },
+ {
+ "epoch": 1.55,
+ "grad_norm": 0.9493719339370728,
+ "learning_rate": 7.875662436617211e-06,
+ "loss": 0.3196,
+ "step": 790
+ },
+ {
+ "epoch": 1.57,
+ "grad_norm": 1.1995518207550049,
+ "learning_rate": 7.824549843104378e-06,
+ "loss": 0.3048,
+ "step": 800
+ },
+ {
+ "epoch": 1.57,
+ "eval_loss": 0.369180291891098,
+ "eval_runtime": 39.26,
+ "eval_samples_per_second": 73.179,
+ "eval_steps_per_second": 2.292,
+ "step": 800
+ },
+ {
+ "epoch": 1.59,
+ "grad_norm": 0.8687970042228699,
+ "learning_rate": 7.773000008851323e-06,
+ "loss": 0.3529,
+ "step": 810
+ },
+ {
+ "epoch": 1.61,
+ "grad_norm": 1.1251832246780396,
+ "learning_rate": 7.721020913780137e-06,
+ "loss": 0.4047,
+ "step": 820
+ },
+ {
+ "epoch": 1.63,
+ "grad_norm": 1.1053804159164429,
+ "learning_rate": 7.668620604262548e-06,
+ "loss": 0.3268,
+ "step": 830
+ },
+ {
+ "epoch": 1.65,
+ "grad_norm": 0.8911699652671814,
+ "learning_rate": 7.615807191874349e-06,
+ "loss": 0.3198,
+ "step": 840
+ },
+ {
+ "epoch": 1.67,
+ "grad_norm": 1.4273747205734253,
+ "learning_rate": 7.56258885213971e-06,
+ "loss": 0.3856,
+ "step": 850
+ },
+ {
+ "epoch": 1.69,
+ "grad_norm": 0.9190964102745056,
+ "learning_rate": 7.50897382326562e-06,
+ "loss": 0.2915,
+ "step": 860
+ },
+ {
+ "epoch": 1.71,
+ "grad_norm": 0.6233330965042114,
+ "learning_rate": 7.454970404866612e-06,
+ "loss": 0.2955,
+ "step": 870
+ },
+ {
+ "epoch": 1.73,
+ "grad_norm": 1.0920981168746948,
+ "learning_rate": 7.400586956679965e-06,
+ "loss": 0.3256,
+ "step": 880
+ },
+ {
+ "epoch": 1.75,
+ "grad_norm": 1.1413133144378662,
+ "learning_rate": 7.345831897271636e-06,
+ "loss": 0.3329,
+ "step": 890
+ },
+ {
+ "epoch": 1.77,
+ "grad_norm": 0.7899062037467957,
+ "learning_rate": 7.2907137027330455e-06,
+ "loss": 0.4319,
+ "step": 900
+ },
+ {
+ "epoch": 1.79,
+ "grad_norm": 1.382123351097107,
+ "learning_rate": 7.235240905368997e-06,
+ "loss": 0.3695,
+ "step": 910
+ },
+ {
+ "epoch": 1.81,
+ "grad_norm": 1.0356751680374146,
+ "learning_rate": 7.179422092376856e-06,
+ "loss": 0.2896,
+ "step": 920
+ },
+ {
+ "epoch": 1.83,
+ "grad_norm": 1.0618700981140137,
+ "learning_rate": 7.123265904517264e-06,
+ "loss": 0.3428,
+ "step": 930
+ },
+ {
+ "epoch": 1.85,
+ "grad_norm": 1.2953617572784424,
+ "learning_rate": 7.066781034776546e-06,
+ "loss": 0.3375,
+ "step": 940
+ },
+ {
+ "epoch": 1.87,
+ "grad_norm": 0.5693756937980652,
+ "learning_rate": 7.009976227021018e-06,
+ "loss": 0.3072,
+ "step": 950
+ },
+ {
+ "epoch": 1.89,
+ "grad_norm": 1.263010859489441,
+ "learning_rate": 6.952860274643466e-06,
+ "loss": 0.3428,
+ "step": 960
+ },
+ {
+ "epoch": 1.91,
+ "grad_norm": 1.0063074827194214,
+ "learning_rate": 6.895442019201898e-06,
+ "loss": 0.374,
+ "step": 970
+ },
+ {
+ "epoch": 1.93,
+ "grad_norm": 1.2151877880096436,
+ "learning_rate": 6.837730349050886e-06,
+ "loss": 0.3584,
+ "step": 980
+ },
+ {
+ "epoch": 1.94,
+ "grad_norm": 0.551629364490509,
+ "learning_rate": 6.7797341979656454e-06,
+ "loss": 0.2854,
+ "step": 990
+ },
+ {
+ "epoch": 1.96,
+ "grad_norm": 0.9264897108078003,
+ "learning_rate": 6.721462543759077e-06,
+ "loss": 0.4185,
+ "step": 1000
+ },
+ {
+ "epoch": 1.96,
+ "eval_loss": 0.35499048233032227,
+ "eval_runtime": 39.2232,
+ "eval_samples_per_second": 73.247,
+ "eval_steps_per_second": 2.295,
+ "step": 1000
+ },
+ {
+ "epoch": 1.98,
+ "grad_norm": 1.1430025100708008,
+ "learning_rate": 6.6629244068920155e-06,
+ "loss": 0.3806,
+ "step": 1010
+ },
+ {
+ "epoch": 2.0,
+ "grad_norm": 1.0283466577529907,
+ "learning_rate": 6.6041288490768385e-06,
+ "loss": 0.2947,
+ "step": 1020
+ },
+ {
+ "epoch": 2.02,
+ "grad_norm": 0.675986647605896,
+ "learning_rate": 6.545084971874738e-06,
+ "loss": 0.2732,
+ "step": 1030
+ },
+ {
+ "epoch": 2.04,
+ "grad_norm": 0.8484103083610535,
+ "learning_rate": 6.485801915286776e-06,
+ "loss": 0.2952,
+ "step": 1040
+ },
+ {
+ "epoch": 2.06,
+ "grad_norm": 0.9744128584861755,
+ "learning_rate": 6.426288856339022e-06,
+ "loss": 0.2251,
+ "step": 1050
+ },
+ {
+ "epoch": 2.08,
+ "grad_norm": 1.0789810419082642,
+ "learning_rate": 6.366555007661949e-06,
+ "loss": 0.2822,
+ "step": 1060
+ },
+ {
+ "epoch": 2.1,
+ "grad_norm": 0.9231953620910645,
+ "learning_rate": 6.306609616064304e-06,
+ "loss": 0.278,
+ "step": 1070
+ },
+ {
+ "epoch": 2.12,
+ "grad_norm": 1.3422801494598389,
+ "learning_rate": 6.246461961101721e-06,
+ "loss": 0.2213,
+ "step": 1080
+ },
+ {
+ "epoch": 2.14,
+ "grad_norm": 0.9151216745376587,
+ "learning_rate": 6.186121353640223e-06,
+ "loss": 0.3001,
+ "step": 1090
+ },
+ {
+ "epoch": 2.16,
+ "grad_norm": 1.156983494758606,
+ "learning_rate": 6.125597134414919e-06,
+ "loss": 0.3357,
+ "step": 1100
+ },
+ {
+ "epoch": 2.18,
+ "grad_norm": 1.0727161169052124,
+ "learning_rate": 6.064898672584036e-06,
+ "loss": 0.3637,
+ "step": 1110
+ },
+ {
+ "epoch": 2.2,
+ "grad_norm": 1.6018846035003662,
+ "learning_rate": 6.004035364278593e-06,
+ "loss": 0.4342,
+ "step": 1120
+ },
+ {
+ "epoch": 2.22,
+ "grad_norm": 1.2866791486740112,
+ "learning_rate": 5.94301663114786e-06,
+ "loss": 0.3534,
+ "step": 1130
+ },
+ {
+ "epoch": 2.24,
+ "grad_norm": 1.2443523406982422,
+ "learning_rate": 5.881851918900897e-06,
+ "loss": 0.271,
+ "step": 1140
+ },
+ {
+ "epoch": 2.26,
+ "grad_norm": 1.3801060914993286,
+ "learning_rate": 5.820550695844354e-06,
+ "loss": 0.3774,
+ "step": 1150
+ },
+ {
+ "epoch": 2.28,
+ "grad_norm": 1.0210528373718262,
+ "learning_rate": 5.759122451416769e-06,
+ "loss": 0.2967,
+ "step": 1160
+ },
+ {
+ "epoch": 2.3,
+ "grad_norm": 1.162329077720642,
+ "learning_rate": 5.697576694719616e-06,
+ "loss": 0.2556,
+ "step": 1170
+ },
+ {
+ "epoch": 2.32,
+ "grad_norm": 0.8746941685676575,
+ "learning_rate": 5.635922953045278e-06,
+ "loss": 0.3435,
+ "step": 1180
+ },
+ {
+ "epoch": 2.34,
+ "grad_norm": 1.5948892831802368,
+ "learning_rate": 5.574170770402236e-06,
+ "loss": 0.3064,
+ "step": 1190
+ },
+ {
+ "epoch": 2.36,
+ "grad_norm": 1.2417634725570679,
+ "learning_rate": 5.512329706037643e-06,
+ "loss": 0.3737,
+ "step": 1200
+ },
+ {
+ "epoch": 2.36,
+ "eval_loss": 0.34871676564216614,
+ "eval_runtime": 39.2425,
+ "eval_samples_per_second": 73.211,
+ "eval_steps_per_second": 2.293,
+ "step": 1200
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 1.1724433898925781,
+ "learning_rate": 5.4504093329575546e-06,
+ "loss": 0.3051,
+ "step": 1210
+ },
+ {
+ "epoch": 2.4,
+ "grad_norm": 1.51445734500885,
+ "learning_rate": 5.388419236445033e-06,
+ "loss": 0.3564,
+ "step": 1220
+ },
+ {
+ "epoch": 2.42,
+ "grad_norm": 1.1924351453781128,
+ "learning_rate": 5.3263690125763316e-06,
+ "loss": 0.3243,
+ "step": 1230
+ },
+ {
+ "epoch": 2.44,
+ "grad_norm": 1.434472918510437,
+ "learning_rate": 5.264268266735432e-06,
+ "loss": 0.2923,
+ "step": 1240
+ },
+ {
+ "epoch": 2.46,
+ "grad_norm": 1.9453471899032593,
+ "learning_rate": 5.202126612127125e-06,
+ "loss": 0.3074,
+ "step": 1250
+ },
+ {
+ "epoch": 2.48,
+ "grad_norm": 1.517500877380371,
+ "learning_rate": 5.139953668288883e-06,
+ "loss": 0.3111,
+ "step": 1260
+ },
+ {
+ "epoch": 2.5,
+ "grad_norm": 2.1911709308624268,
+ "learning_rate": 5.077759059601756e-06,
+ "loss": 0.3539,
+ "step": 1270
+ },
+ {
+ "epoch": 2.51,
+ "grad_norm": 1.6386833190917969,
+ "learning_rate": 5.015552413800514e-06,
+ "loss": 0.2223,
+ "step": 1280
+ },
+ {
+ "epoch": 2.53,
+ "grad_norm": 0.7657915949821472,
+ "learning_rate": 4.953343360483281e-06,
+ "loss": 0.2714,
+ "step": 1290
+ },
+ {
+ "epoch": 2.55,
+ "grad_norm": 1.4364550113677979,
+ "learning_rate": 4.8911415296208555e-06,
+ "loss": 0.2568,
+ "step": 1300
+ },
+ {
+ "epoch": 2.57,
+ "grad_norm": 1.270623803138733,
+ "learning_rate": 4.828956550066006e-06,
+ "loss": 0.2363,
+ "step": 1310
+ },
+ {
+ "epoch": 2.59,
+ "grad_norm": 1.8665142059326172,
+ "learning_rate": 4.766798048062913e-06,
+ "loss": 0.4116,
+ "step": 1320
+ },
+ {
+ "epoch": 2.61,
+ "grad_norm": 1.2894102334976196,
+ "learning_rate": 4.704675645757028e-06,
+ "loss": 0.3128,
+ "step": 1330
+ },
+ {
+ "epoch": 2.63,
+ "grad_norm": 1.0780175924301147,
+ "learning_rate": 4.642598959705565e-06,
+ "loss": 0.2368,
+ "step": 1340
+ },
+ {
+ "epoch": 2.65,
+ "grad_norm": 0.9098504185676575,
+ "learning_rate": 4.580577599388854e-06,
+ "loss": 0.3453,
+ "step": 1350
+ },
+ {
+ "epoch": 2.67,
+ "grad_norm": 1.0072888135910034,
+ "learning_rate": 4.518621165722794e-06,
+ "loss": 0.2893,
+ "step": 1360
+ },
+ {
+ "epoch": 2.69,
+ "grad_norm": 0.721864640712738,
+ "learning_rate": 4.456739249572628e-06,
+ "loss": 0.3422,
+ "step": 1370
+ },
+ {
+ "epoch": 2.71,
+ "grad_norm": 1.9018405675888062,
+ "learning_rate": 4.394941430268275e-06,
+ "loss": 0.3076,
+ "step": 1380
+ },
+ {
+ "epoch": 2.73,
+ "grad_norm": 0.8948503732681274,
+ "learning_rate": 4.333237274121453e-06,
+ "loss": 0.3504,
+ "step": 1390
+ },
+ {
+ "epoch": 2.75,
+ "grad_norm": 2.278956413269043,
+ "learning_rate": 4.271636332944806e-06,
+ "loss": 0.2418,
+ "step": 1400
+ },
+ {
+ "epoch": 2.75,
+ "eval_loss": 0.3422459363937378,
+ "eval_runtime": 39.2204,
+ "eval_samples_per_second": 73.253,
+ "eval_steps_per_second": 2.295,
+ "step": 1400
+ },
+ {
+ "epoch": 2.77,
+ "grad_norm": 1.2129451036453247,
+ "learning_rate": 4.210148142573287e-06,
+ "loss": 0.3114,
+ "step": 1410
+ },
+ {
+ "epoch": 2.79,
+ "grad_norm": 1.1083780527114868,
+ "learning_rate": 4.148782221388007e-06,
+ "loss": 0.3683,
+ "step": 1420
+ },
+ {
+ "epoch": 2.81,
+ "grad_norm": 1.3645988702774048,
+ "learning_rate": 4.087548068842787e-06,
+ "loss": 0.3036,
+ "step": 1430
+ },
+ {
+ "epoch": 2.83,
+ "grad_norm": 0.9339216947555542,
+ "learning_rate": 4.026455163993645e-06,
+ "loss": 0.3015,
+ "step": 1440
+ },
+ {
+ "epoch": 2.85,
+ "grad_norm": 1.083004355430603,
+ "learning_rate": 3.965512964031435e-06,
+ "loss": 0.2794,
+ "step": 1450
+ },
+ {
+ "epoch": 2.87,
+ "grad_norm": 1.1303693056106567,
+ "learning_rate": 3.904730902817867e-06,
+ "loss": 0.2911,
+ "step": 1460
+ },
+ {
+ "epoch": 2.89,
+ "grad_norm": 1.3334907293319702,
+ "learning_rate": 3.844118389425154e-06,
+ "loss": 0.2511,
+ "step": 1470
+ },
+ {
+ "epoch": 2.91,
+ "grad_norm": 2.2911875247955322,
+ "learning_rate": 3.7836848066794763e-06,
+ "loss": 0.3791,
+ "step": 1480
+ },
+ {
+ "epoch": 2.93,
+ "grad_norm": 1.7196086645126343,
+ "learning_rate": 3.7234395097085298e-06,
+ "loss": 0.3396,
+ "step": 1490
+ },
+ {
+ "epoch": 2.95,
+ "grad_norm": 0.5811883211135864,
+ "learning_rate": 3.663391824493336e-06,
+ "loss": 0.1901,
+ "step": 1500
+ },
+ {
+ "epoch": 2.97,
+ "grad_norm": 2.1780526638031006,
+ "learning_rate": 3.6035510464245937e-06,
+ "loss": 0.3611,
+ "step": 1510
+ },
+ {
+ "epoch": 2.99,
+ "grad_norm": 2.1352715492248535,
+ "learning_rate": 3.5439264388637407e-06,
+ "loss": 0.2753,
+ "step": 1520
+ },
+ {
+ "epoch": 3.01,
+ "grad_norm": 0.9786732792854309,
+ "learning_rate": 3.4845272317089882e-06,
+ "loss": 0.2861,
+ "step": 1530
+ },
+ {
+ "epoch": 3.03,
+ "grad_norm": 1.4434765577316284,
+ "learning_rate": 3.4253626199665314e-06,
+ "loss": 0.2989,
+ "step": 1540
+ },
+ {
+ "epoch": 3.05,
+ "grad_norm": 2.0777690410614014,
+ "learning_rate": 3.3664417623271594e-06,
+ "loss": 0.3478,
+ "step": 1550
+ },
+ {
+ "epoch": 3.06,
+ "grad_norm": 1.3415099382400513,
+ "learning_rate": 3.3077737797484923e-06,
+ "loss": 0.2654,
+ "step": 1560
+ },
+ {
+ "epoch": 3.08,
+ "grad_norm": 0.7920995950698853,
+ "learning_rate": 3.249367754043047e-06,
+ "loss": 0.2063,
+ "step": 1570
+ },
+ {
+ "epoch": 3.1,
+ "grad_norm": 1.4506205320358276,
+ "learning_rate": 3.1912327264723843e-06,
+ "loss": 0.3329,
+ "step": 1580
+ },
+ {
+ "epoch": 3.12,
+ "grad_norm": 1.1375677585601807,
+ "learning_rate": 3.133377696347506e-06,
+ "loss": 0.3934,
+ "step": 1590
+ },
+ {
+ "epoch": 3.14,
+ "grad_norm": 1.2222238779067993,
+ "learning_rate": 3.075811619635774e-06,
+ "loss": 0.2528,
+ "step": 1600
+ },
+ {
+ "epoch": 3.14,
+ "eval_loss": 0.33900028467178345,
+ "eval_runtime": 39.2361,
+ "eval_samples_per_second": 73.223,
+ "eval_steps_per_second": 2.294,
+ "step": 1600
+ },
+ {
+ "epoch": 3.16,
+ "grad_norm": 1.0245050191879272,
+ "learning_rate": 3.0185434075745124e-06,
+ "loss": 0.247,
+ "step": 1610
+ },
+ {
+ "epoch": 3.18,
+ "grad_norm": 1.194373607635498,
+ "learning_rate": 2.961581925291557e-06,
+ "loss": 0.313,
+ "step": 1620
+ },
+ {
+ "epoch": 3.2,
+ "grad_norm": 1.9943459033966064,
+ "learning_rate": 2.9049359904329234e-06,
+ "loss": 0.3331,
+ "step": 1630
+ },
+ {
+ "epoch": 3.22,
+ "grad_norm": 1.5466620922088623,
+ "learning_rate": 2.8486143717978448e-06,
+ "loss": 0.2684,
+ "step": 1640
+ },
+ {
+ "epoch": 3.24,
+ "grad_norm": 1.1305476427078247,
+ "learning_rate": 2.792625787981349e-06,
+ "loss": 0.2743,
+ "step": 1650
+ },
+ {
+ "epoch": 3.26,
+ "grad_norm": 1.368371605873108,
+ "learning_rate": 2.736978906024626e-06,
+ "loss": 0.3512,
+ "step": 1660
+ },
+ {
+ "epoch": 3.28,
+ "grad_norm": 1.7816555500030518,
+ "learning_rate": 2.6816823400733628e-06,
+ "loss": 0.2919,
+ "step": 1670
+ },
+ {
+ "epoch": 3.3,
+ "grad_norm": 1.249085783958435,
+ "learning_rate": 2.626744650044284e-06,
+ "loss": 0.322,
+ "step": 1680
+ },
+ {
+ "epoch": 3.32,
+ "grad_norm": 1.6129003763198853,
+ "learning_rate": 2.572174340300061e-06,
+ "loss": 0.2349,
+ "step": 1690
+ },
+ {
+ "epoch": 3.34,
+ "grad_norm": 2.0825748443603516,
+ "learning_rate": 2.5179798583328415e-06,
+ "loss": 0.2368,
+ "step": 1700
+ },
+ {
+ "epoch": 3.36,
+ "grad_norm": 0.43531525135040283,
+ "learning_rate": 2.4641695934565827e-06,
+ "loss": 0.3711,
+ "step": 1710
+ },
+ {
+ "epoch": 3.38,
+ "grad_norm": 1.051423192024231,
+ "learning_rate": 2.410751875508373e-06,
+ "loss": 0.2323,
+ "step": 1720
+ },
+ {
+ "epoch": 3.4,
+ "grad_norm": 1.935787320137024,
+ "learning_rate": 2.3577349735589787e-06,
+ "loss": 0.3185,
+ "step": 1730
+ },
+ {
+ "epoch": 3.42,
+ "grad_norm": 1.971083641052246,
+ "learning_rate": 2.3051270946327887e-06,
+ "loss": 0.2479,
+ "step": 1740
+ },
+ {
+ "epoch": 3.44,
+ "grad_norm": 1.8540164232254028,
+ "learning_rate": 2.2529363824373707e-06,
+ "loss": 0.3738,
+ "step": 1750
+ },
+ {
+ "epoch": 3.46,
+ "grad_norm": 2.1510169506073,
+ "learning_rate": 2.2011709161028156e-06,
+ "loss": 0.3481,
+ "step": 1760
+ },
+ {
+ "epoch": 3.48,
+ "grad_norm": 1.3573346138000488,
+ "learning_rate": 2.149838708931087e-06,
+ "loss": 0.2863,
+ "step": 1770
+ },
+ {
+ "epoch": 3.5,
+ "grad_norm": 0.8732242584228516,
+ "learning_rate": 2.0989477071555757e-06,
+ "loss": 0.2415,
+ "step": 1780
+ },
+ {
+ "epoch": 3.52,
+ "grad_norm": 1.0924676656723022,
+ "learning_rate": 2.0485057887110026e-06,
+ "loss": 0.2989,
+ "step": 1790
+ },
+ {
+ "epoch": 3.54,
+ "grad_norm": 1.789581298828125,
+ "learning_rate": 1.998520762013923e-06,
+ "loss": 0.3192,
+ "step": 1800
+ },
+ {
+ "epoch": 3.54,
+ "eval_loss": 0.33926254510879517,
+ "eval_runtime": 39.2683,
+ "eval_samples_per_second": 73.163,
+ "eval_steps_per_second": 2.292,
+ "step": 1800
+ },
+ {
+ "epoch": 3.56,
+ "grad_norm": 1.4803309440612793,
+ "learning_rate": 1.949000364753985e-06,
+ "loss": 0.2484,
+ "step": 1810
+ },
+ {
+ "epoch": 3.58,
+ "grad_norm": 2.3573765754699707,
+ "learning_rate": 1.8999522626961254e-06,
+ "loss": 0.2761,
+ "step": 1820
+ },
+ {
+ "epoch": 3.6,
+ "grad_norm": 1.3212318420410156,
+ "learning_rate": 1.851384048493914e-06,
+ "loss": 0.2972,
+ "step": 1830
+ },
+ {
+ "epoch": 3.61,
+ "grad_norm": 1.6966694593429565,
+ "learning_rate": 1.8033032405142075e-06,
+ "loss": 0.2279,
+ "step": 1840
+ },
+ {
+ "epoch": 3.63,
+ "grad_norm": 1.9742337465286255,
+ "learning_rate": 1.7557172816733104e-06,
+ "loss": 0.2662,
+ "step": 1850
+ },
+ {
+ "epoch": 3.65,
+ "grad_norm": 1.3778648376464844,
+ "learning_rate": 1.7086335382848012e-06,
+ "loss": 0.3054,
+ "step": 1860
+ },
+ {
+ "epoch": 3.67,
+ "grad_norm": 1.8162543773651123,
+ "learning_rate": 1.6620592989192318e-06,
+ "loss": 0.3051,
+ "step": 1870
+ },
+ {
+ "epoch": 3.69,
+ "grad_norm": 1.5290098190307617,
+ "learning_rate": 1.6160017732758588e-06,
+ "loss": 0.2765,
+ "step": 1880
+ },
+ {
+ "epoch": 3.71,
+ "grad_norm": 1.4022248983383179,
+ "learning_rate": 1.5704680910665721e-06,
+ "loss": 0.2839,
+ "step": 1890
+ },
+ {
+ "epoch": 3.73,
+ "grad_norm": 1.5009647607803345,
+ "learning_rate": 1.5254653009122206e-06,
+ "loss": 0.2527,
+ "step": 1900
+ },
+ {
+ "epoch": 3.75,
+ "grad_norm": 1.65267813205719,
+ "learning_rate": 1.4810003692514829e-06,
+ "loss": 0.2268,
+ "step": 1910
+ },
+ {
+ "epoch": 3.77,
+ "grad_norm": 1.7568740844726562,
+ "learning_rate": 1.4370801792624656e-06,
+ "loss": 0.3011,
+ "step": 1920
+ },
+ {
+ "epoch": 3.79,
+ "grad_norm": 1.2601398229599,
+ "learning_rate": 1.3937115297971788e-06,
+ "loss": 0.2938,
+ "step": 1930
+ },
+ {
+ "epoch": 3.81,
+ "grad_norm": 1.4082847833633423,
+ "learning_rate": 1.3509011343290767e-06,
+ "loss": 0.2375,
+ "step": 1940
+ },
+ {
+ "epoch": 3.83,
+ "grad_norm": 1.3604422807693481,
+ "learning_rate": 1.3086556199138117e-06,
+ "loss": 0.2748,
+ "step": 1950
+ },
+ {
+ "epoch": 3.85,
+ "grad_norm": 1.3298903703689575,
+ "learning_rate": 1.2669815261633666e-06,
+ "loss": 0.2703,
+ "step": 1960
+ },
+ {
+ "epoch": 3.87,
+ "grad_norm": 1.3025224208831787,
+ "learning_rate": 1.225885304233716e-06,
+ "loss": 0.2331,
+ "step": 1970
+ },
+ {
+ "epoch": 3.89,
+ "grad_norm": 0.9569131135940552,
+ "learning_rate": 1.1853733158261894e-06,
+ "loss": 0.3299,
+ "step": 1980
+ },
+ {
+ "epoch": 3.91,
+ "grad_norm": 1.0660338401794434,
+ "learning_rate": 1.1454518322026836e-06,
+ "loss": 0.2159,
+ "step": 1990
+ },
+ {
+ "epoch": 3.93,
+ "grad_norm": 0.9487476944923401,
+ "learning_rate": 1.1061270332148626e-06,
+ "loss": 0.2834,
+ "step": 2000
+ },
+ {
+ "epoch": 3.93,
+ "eval_loss": 0.3370112180709839,
+ "eval_runtime": 39.2536,
+ "eval_samples_per_second": 73.191,
+ "eval_steps_per_second": 2.293,
+ "step": 2000
+ },
+ {
+ "epoch": 3.95,
+ "grad_norm": 0.9943105578422546,
+ "learning_rate": 1.06740500634752e-06,
+ "loss": 0.2315,
+ "step": 2010
+ },
+ {
+ "epoch": 3.97,
+ "grad_norm": 1.6915420293807983,
+ "learning_rate": 1.0292917457762325e-06,
+ "loss": 0.32,
+ "step": 2020
+ },
+ {
+ "epoch": 3.99,
+ "grad_norm": 1.3354073762893677,
+ "learning_rate": 9.917931514394674e-07,
+ "loss": 0.3086,
+ "step": 2030
+ },
+ {
+ "epoch": 4.01,
+ "grad_norm": 1.4191135168075562,
+ "learning_rate": 9.549150281252633e-07,
+ "loss": 0.327,
+ "step": 2040
+ },
+ {
+ "epoch": 4.03,
+ "grad_norm": 1.4209463596343994,
+ "learning_rate": 9.186630845726497e-07,
+ "loss": 0.2789,
+ "step": 2050
+ },
+ {
+ "epoch": 4.05,
+ "grad_norm": 1.3217524290084839,
+ "learning_rate": 8.830429325879397e-07,
+ "loss": 0.305,
+ "step": 2060
+ },
+ {
+ "epoch": 4.07,
+ "grad_norm": 0.6172815561294556,
+ "learning_rate": 8.480600861760124e-07,
+ "loss": 0.2732,
+ "step": 2070
+ },
+ {
+ "epoch": 4.09,
+ "grad_norm": 0.9870301485061646,
+ "learning_rate": 8.137199606867507e-07,
+ "loss": 0.2897,
+ "step": 2080
+ },
+ {
+ "epoch": 4.11,
+ "grad_norm": 0.8269962072372437,
+ "learning_rate": 7.800278719767407e-07,
+ "loss": 0.3181,
+ "step": 2090
+ },
+ {
+ "epoch": 4.13,
+ "grad_norm": 0.7305634021759033,
+ "learning_rate": 7.469890355863785e-07,
+ "loss": 0.2144,
+ "step": 2100
+ },
+ {
+ "epoch": 4.15,
+ "grad_norm": 0.5976698398590088,
+ "learning_rate": 7.146085659325025e-07,
+ "loss": 0.294,
+ "step": 2110
+ },
+ {
+ "epoch": 4.17,
+ "grad_norm": 1.456742763519287,
+ "learning_rate": 6.828914755166826e-07,
+ "loss": 0.2729,
+ "step": 2120
+ },
+ {
+ "epoch": 4.18,
+ "grad_norm": 1.1731284856796265,
+ "learning_rate": 6.518426741492823e-07,
+ "loss": 0.2509,
+ "step": 2130
+ },
+ {
+ "epoch": 4.2,
+ "grad_norm": 1.3907181024551392,
+ "learning_rate": 6.214669681894181e-07,
+ "loss": 0.2271,
+ "step": 2140
+ },
+ {
+ "epoch": 4.22,
+ "grad_norm": 1.0336577892303467,
+ "learning_rate": 5.917690598009385e-07,
+ "loss": 0.2703,
+ "step": 2150
+ },
+ {
+ "epoch": 4.24,
+ "grad_norm": 0.9870587587356567,
+ "learning_rate": 5.62753546224526e-07,
+ "loss": 0.2339,
+ "step": 2160
+ },
+ {
+ "epoch": 4.26,
+ "grad_norm": 1.3517531156539917,
+ "learning_rate": 5.344249190660427e-07,
+ "loss": 0.3029,
+ "step": 2170
+ },
+ {
+ "epoch": 4.28,
+ "grad_norm": 1.3691924810409546,
+ "learning_rate": 5.067875636012304e-07,
+ "loss": 0.3205,
+ "step": 2180
+ },
+ {
+ "epoch": 4.3,
+ "grad_norm": 1.9304887056350708,
+ "learning_rate": 4.798457580968674e-07,
+ "loss": 0.2875,
+ "step": 2190
+ },
+ {
+ "epoch": 4.32,
+ "grad_norm": 1.0140875577926636,
+ "learning_rate": 4.536036731484972e-07,
+ "loss": 0.3612,
+ "step": 2200
+ },
+ {
+ "epoch": 4.32,
+ "eval_loss": 0.337531715631485,
+ "eval_runtime": 39.2368,
+ "eval_samples_per_second": 73.222,
+ "eval_steps_per_second": 2.294,
+ "step": 2200
+ },
+ {
+ "epoch": 4.34,
+ "grad_norm": 2.371439218521118,
+ "learning_rate": 4.280653710348126e-07,
+ "loss": 0.2825,
+ "step": 2210
+ },
+ {
+ "epoch": 4.36,
+ "grad_norm": 1.9617916345596313,
+ "learning_rate": 4.03234805088818e-07,
+ "loss": 0.323,
+ "step": 2220
+ },
+ {
+ "epoch": 4.38,
+ "grad_norm": 1.9243499040603638,
+ "learning_rate": 3.7911581908585626e-07,
+ "loss": 0.2784,
+ "step": 2230
+ },
+ {
+ "epoch": 4.4,
+ "grad_norm": 1.2725778818130493,
+ "learning_rate": 3.5571214664858356e-07,
+ "loss": 0.2632,
+ "step": 2240
+ },
+ {
+ "epoch": 4.42,
+ "grad_norm": 1.0478254556655884,
+ "learning_rate": 3.330274106690112e-07,
+ "loss": 0.2192,
+ "step": 2250
+ },
+ {
+ "epoch": 4.44,
+ "grad_norm": 0.705704927444458,
+ "learning_rate": 3.110651227476774e-07,
+ "loss": 0.2647,
+ "step": 2260
+ },
+ {
+ "epoch": 4.46,
+ "grad_norm": 1.8936996459960938,
+ "learning_rate": 2.8982868265005457e-07,
+ "loss": 0.2897,
+ "step": 2270
+ },
+ {
+ "epoch": 4.48,
+ "grad_norm": 1.3405539989471436,
+ "learning_rate": 2.693213777802606e-07,
+ "loss": 0.3023,
+ "step": 2280
+ },
+ {
+ "epoch": 4.5,
+ "grad_norm": 1.2517961263656616,
+ "learning_rate": 2.495463826721711e-07,
+ "loss": 0.2419,
+ "step": 2290
+ },
+ {
+ "epoch": 4.52,
+ "grad_norm": 2.112408399581909,
+ "learning_rate": 2.305067584980014e-07,
+ "loss": 0.3104,
+ "step": 2300
+ },
+ {
+ "epoch": 4.54,
+ "grad_norm": 1.2434989213943481,
+ "learning_rate": 2.1220545259443558e-07,
+ "loss": 0.328,
+ "step": 2310
+ },
+ {
+ "epoch": 4.56,
+ "grad_norm": 1.478872299194336,
+ "learning_rate": 1.9464529800637731e-07,
+ "loss": 0.307,
+ "step": 2320
+ },
+ {
+ "epoch": 4.58,
+ "grad_norm": 1.822096347808838,
+ "learning_rate": 1.7782901304839617e-07,
+ "loss": 0.2437,
+ "step": 2330
+ },
+ {
+ "epoch": 4.6,
+ "grad_norm": 1.8473854064941406,
+ "learning_rate": 1.617592008839308e-07,
+ "loss": 0.2862,
+ "step": 2340
+ },
+ {
+ "epoch": 4.62,
+ "grad_norm": 1.2884219884872437,
+ "learning_rate": 1.4643834912232035e-07,
+ "loss": 0.2901,
+ "step": 2350
+ },
+ {
+ "epoch": 4.64,
+ "grad_norm": 1.3200654983520508,
+ "learning_rate": 1.3186882943371892e-07,
+ "loss": 0.2482,
+ "step": 2360
+ },
+ {
+ "epoch": 4.66,
+ "grad_norm": 1.8138701915740967,
+ "learning_rate": 1.1805289718196499e-07,
+ "loss": 0.2957,
+ "step": 2370
+ },
+ {
+ "epoch": 4.68,
+ "grad_norm": 1.3670308589935303,
+ "learning_rate": 1.0499269107544674e-07,
+ "loss": 0.3179,
+ "step": 2380
+ },
+ {
+ "epoch": 4.7,
+ "grad_norm": 1.4334827661514282,
+ "learning_rate": 9.269023283603295e-08,
+ "loss": 0.277,
+ "step": 2390
+ },
+ {
+ "epoch": 4.72,
+ "grad_norm": 1.5253217220306396,
+ "learning_rate": 8.114742688610789e-08,
+ "loss": 0.2732,
+ "step": 2400
+ },
+ {
+ "epoch": 4.72,
+ "eval_loss": 0.33687904477119446,
+ "eval_runtime": 39.2327,
+ "eval_samples_per_second": 73.23,
+ "eval_steps_per_second": 2.294,
+ "step": 2400
+ },
+ {
+ "epoch": 4.73,
+ "grad_norm": 1.0200579166412354,
+ "learning_rate": 7.036606005376701e-08,
+ "loss": 0.2598,
+ "step": 2410
+ },
+ {
+ "epoch": 4.75,
+ "grad_norm": 1.607457160949707,
+ "learning_rate": 6.034780129621664e-08,
+ "loss": 0.2599,
+ "step": 2420
+ },
+ {
+ "epoch": 4.77,
+ "grad_norm": 1.8155800104141235,
+ "learning_rate": 5.1094201441417855e-08,
+ "loss": 0.2699,
+ "step": 2430
+ },
+ {
+ "epoch": 4.79,
+ "grad_norm": 0.9994600415229797,
+ "learning_rate": 4.260669294801956e-08,
+ "loss": 0.2819,
+ "step": 2440
+ },
+ {
+ "epoch": 4.81,
+ "grad_norm": 2.0200681686401367,
+ "learning_rate": 3.488658968361147e-08,
+ "loss": 0.2666,
+ "step": 2450
+ },
+ {
+ "epoch": 4.83,
+ "grad_norm": 1.5585851669311523,
+ "learning_rate": 2.79350867213396e-08,
+ "loss": 0.2418,
+ "step": 2460
+ },
+ {
+ "epoch": 4.85,
+ "grad_norm": 1.5332939624786377,
+ "learning_rate": 2.1753260154906973e-08,
+ "loss": 0.1971,
+ "step": 2470
+ },
+ {
+ "epoch": 4.87,
+ "grad_norm": 2.014511823654175,
+ "learning_rate": 1.6342066931995804e-08,
+ "loss": 0.3358,
+ "step": 2480
+ },
+ {
+ "epoch": 4.89,
+ "grad_norm": 1.5106168985366821,
+ "learning_rate": 1.1702344706129298e-08,
+ "loss": 0.2409,
+ "step": 2490
+ },
+ {
+ "epoch": 4.91,
+ "grad_norm": 1.561906337738037,
+ "learning_rate": 7.834811707005952e-09,
+ "loss": 0.2549,
+ "step": 2500
+ },
+ {
+ "epoch": 4.93,
+ "grad_norm": 1.239657998085022,
+ "learning_rate": 4.740066629315143e-09,
+ "loss": 0.2957,
+ "step": 2510
+ },
+ {
+ "epoch": 4.95,
+ "grad_norm": 1.6581403017044067,
+ "learning_rate": 2.4185885400596076e-09,
+ "loss": 0.265,
+ "step": 2520
+ },
+ {
+ "epoch": 4.97,
+ "grad_norm": 1.327383279800415,
+ "learning_rate": 8.707368043975318e-10,
+ "loss": 0.2817,
+ "step": 2530
+ },
+ {
+ "epoch": 4.99,
+ "grad_norm": 1.1941030025482178,
+ "learning_rate": 9.675103000872377e-11,
+ "loss": 0.2591,
+ "step": 2540
+ },
+ {
+ "epoch": 5.0,
+ "step": 2545,
+ "total_flos": 1.1692495178962043e+18,
+ "train_loss": 0.3501229747105207,
+ "train_runtime": 3965.184,
+ "train_samples_per_second": 20.526,
+ "train_steps_per_second": 0.642
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 2545,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 1000,
+ "total_flos": 1.1692495178962043e+18,
+ "train_batch_size": 16,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/llama2_13b_peft/unit_conversion/training_args.bin b/llama2_13b_peft/unit_conversion/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..11796cc9d020e6f2ef8a8c9f70558557b6567a57
--- /dev/null
+++ b/llama2_13b_peft/unit_conversion/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94dbc1a54d5022eafa983608a03720d70f2b0138f9618bcd0f3687c51efaaa6c
+size 5112
diff --git a/llama2_13b_peft/unit_conversion/training_eval_loss.png b/llama2_13b_peft/unit_conversion/training_eval_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa7a09f258caf41fb2b8978994441b1bd0856703
Binary files /dev/null and b/llama2_13b_peft/unit_conversion/training_eval_loss.png differ
diff --git a/llama2_13b_peft/unit_conversion/training_loss.png b/llama2_13b_peft/unit_conversion/training_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..97803724eaa94fd2971fd2b69f46568dfd5e3f92
Binary files /dev/null and b/llama2_13b_peft/unit_conversion/training_loss.png differ
diff --git a/llama2_13b_peft/winowhy/README.md b/llama2_13b_peft/winowhy/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..db2e51adfbc94385dd649b3960b6a4af9a4ebfb7
--- /dev/null
+++ b/llama2_13b_peft/winowhy/README.md
@@ -0,0 +1,71 @@
+---
+license: other
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: /data1/model/llama2/meta-llama/Llama2-13b
+model-index:
+- name: winowhy_no_sys
+ results: []
+---
+
+
+
+# winowhy_no_sys
+
+This model is a fine-tuned version of [/data1/model/llama2/meta-llama/Llama2-13b](https://huggingface.co//data1/model/llama2/meta-llama/Llama2-13b) on the winowhy_no_sys dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.1856
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- total_train_batch_size: 8
+- total_eval_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 20
+- num_epochs: 5.0
+
+### Training results
+
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 0.3041 | 0.3676 | 100 | 0.3208 |
+| 0.2575 | 0.7353 | 200 | 0.2202 |
+| 0.2475 | 1.1029 | 300 | 0.1856 |
+| 0.1415 | 1.4706 | 400 | 0.2149 |
+| 0.1281 | 1.8382 | 500 | 0.1950 |
+| 0.173 | 2.2059 | 600 | 0.2180 |
+
+
+### Framework versions
+
+- PEFT 0.10.0
+- Transformers 4.40.0
+- Pytorch 2.2.1
+- Datasets 2.18.0
+- Tokenizers 0.19.1
\ No newline at end of file
diff --git a/llama2_13b_peft/winowhy/adapter_config.json b/llama2_13b_peft/winowhy/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cee9a588ae2800a17e2d229a92b4c336eb0d9393
--- /dev/null
+++ b/llama2_13b_peft/winowhy/adapter_config.json
@@ -0,0 +1,34 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "/data1/model/llama2/meta-llama/Llama2-13b",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0.0,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "gate_proj",
+ "v_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "q_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/winowhy/adapter_model.safetensors b/llama2_13b_peft/winowhy/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8ae312d59c10925229913da6d80ab8bc19bba09f
--- /dev/null
+++ b/llama2_13b_peft/winowhy/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18f5b712225a1b176def73f49c4421f30a1665020adde7a32de725cc79f1b3d7
+size 125248064
diff --git a/llama2_13b_peft/winowhy/all_results.json b/llama2_13b_peft/winowhy/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..18a839613fce6cc63b65c491f4a0f4ef79a5e9c0
--- /dev/null
+++ b/llama2_13b_peft/winowhy/all_results.json
@@ -0,0 +1,12 @@
+{
+ "epoch": 2.2058823529411766,
+ "eval_loss": 0.18564413487911224,
+ "eval_runtime": 1.2133,
+ "eval_samples_per_second": 94.784,
+ "eval_steps_per_second": 12.363,
+ "total_flos": 3.440278732852429e+16,
+ "train_loss": 0.47969158987204236,
+ "train_runtime": 190.8837,
+ "train_samples_per_second": 56.972,
+ "train_steps_per_second": 7.125
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/winowhy/eval_results.json b/llama2_13b_peft/winowhy/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e3608cbe152875d9891f75bf0bb0dfae2aa2bb16
--- /dev/null
+++ b/llama2_13b_peft/winowhy/eval_results.json
@@ -0,0 +1,7 @@
+{
+ "epoch": 2.2058823529411766,
+ "eval_loss": 0.18564413487911224,
+ "eval_runtime": 1.2133,
+ "eval_samples_per_second": 94.784,
+ "eval_steps_per_second": 12.363
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/winowhy/special_tokens_map.json b/llama2_13b_peft/winowhy/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1
--- /dev/null
+++ b/llama2_13b_peft/winowhy/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/llama2_13b_peft/winowhy/tokenizer.model b/llama2_13b_peft/winowhy/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/llama2_13b_peft/winowhy/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/llama2_13b_peft/winowhy/tokenizer_config.json b/llama2_13b_peft/winowhy/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a40266f39e5b5fed14de34710d35eb9e98d6bdad
--- /dev/null
+++ b/llama2_13b_peft/winowhy/tokenizer_config.json
@@ -0,0 +1,45 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}
diff --git a/llama2_13b_peft/winowhy/train_results.json b/llama2_13b_peft/winowhy/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..5b9791f01f8da5a55828b26a9c74b5574719fa2b
--- /dev/null
+++ b/llama2_13b_peft/winowhy/train_results.json
@@ -0,0 +1,8 @@
+{
+ "epoch": 2.2058823529411766,
+ "total_flos": 3.440278732852429e+16,
+ "train_loss": 0.47969158987204236,
+ "train_runtime": 190.8837,
+ "train_samples_per_second": 56.972,
+ "train_steps_per_second": 7.125
+}
\ No newline at end of file
diff --git a/llama2_13b_peft/winowhy/trainer_log.jsonl b/llama2_13b_peft/winowhy/trainer_log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..d4e6df6a4b6c5596e2b5db96842db7da870d8eaf
--- /dev/null
+++ b/llama2_13b_peft/winowhy/trainer_log.jsonl
@@ -0,0 +1,68 @@
+{"current_steps": 10, "total_steps": 1360, "loss": 9.0197, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5e-05, "epoch": 0.03676470588235294, "percentage": 0.74, "elapsed_time": "0:00:04", "remaining_time": "0:09:48"}
+{"current_steps": 20, "total_steps": 1360, "loss": 6.0796, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-05, "epoch": 0.07352941176470588, "percentage": 1.47, "elapsed_time": "0:00:07", "remaining_time": "0:08:03"}
+{"current_steps": 30, "total_steps": 1360, "loss": 0.7866, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99931296277454e-05, "epoch": 0.11029411764705882, "percentage": 2.21, "elapsed_time": "0:00:10", "remaining_time": "0:07:25"}
+{"current_steps": 40, "total_steps": 1360, "loss": 0.3572, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997252228714279e-05, "epoch": 0.14705882352941177, "percentage": 2.94, "elapsed_time": "0:00:12", "remaining_time": "0:07:05"}
+{"current_steps": 50, "total_steps": 1360, "loss": 0.3669, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.993818930460026e-05, "epoch": 0.18382352941176472, "percentage": 3.68, "elapsed_time": "0:00:15", "remaining_time": "0:06:52"}
+{"current_steps": 60, "total_steps": 1360, "loss": 0.364, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9890149550547454e-05, "epoch": 0.22058823529411764, "percentage": 4.41, "elapsed_time": "0:00:18", "remaining_time": "0:06:41"}
+{"current_steps": 70, "total_steps": 1360, "loss": 0.3211, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.982842942906386e-05, "epoch": 0.25735294117647056, "percentage": 5.15, "elapsed_time": "0:00:21", "remaining_time": "0:06:34"}
+{"current_steps": 80, "total_steps": 1360, "loss": 0.3052, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9753062863366276e-05, "epoch": 0.29411764705882354, "percentage": 5.88, "elapsed_time": "0:00:24", "remaining_time": "0:06:27"}
+{"current_steps": 90, "total_steps": 1360, "loss": 0.3446, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.966409127716367e-05, "epoch": 0.33088235294117646, "percentage": 6.62, "elapsed_time": "0:00:26", "remaining_time": "0:06:20"}
+{"current_steps": 100, "total_steps": 1360, "loss": 0.3041, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.95615635718894e-05, "epoch": 0.36764705882352944, "percentage": 7.35, "elapsed_time": "0:00:29", "remaining_time": "0:06:15"}
+{"current_steps": 100, "total_steps": 1360, "loss": null, "eval_loss": 0.3207797408103943, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.36764705882352944, "percentage": 7.35, "elapsed_time": "0:00:29", "remaining_time": "0:06:15"}
+{"current_steps": 110, "total_steps": 1360, "loss": 0.2438, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.944553609982363e-05, "epoch": 0.40441176470588236, "percentage": 8.09, "elapsed_time": "0:00:34", "remaining_time": "0:06:28"}
+{"current_steps": 120, "total_steps": 1360, "loss": 0.3688, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.931607263312032e-05, "epoch": 0.4411764705882353, "percentage": 8.82, "elapsed_time": "0:00:37", "remaining_time": "0:06:22"}
+{"current_steps": 130, "total_steps": 1360, "loss": 0.2945, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.917324432875627e-05, "epoch": 0.47794117647058826, "percentage": 9.56, "elapsed_time": "0:00:39", "remaining_time": "0:06:16"}
+{"current_steps": 140, "total_steps": 1360, "loss": 0.2764, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9017129689421e-05, "epoch": 0.5147058823529411, "percentage": 10.29, "elapsed_time": "0:00:42", "remaining_time": "0:06:11"}
+{"current_steps": 150, "total_steps": 1360, "loss": 0.3507, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8847814520369475e-05, "epoch": 0.5514705882352942, "percentage": 11.03, "elapsed_time": "0:00:45", "remaining_time": "0:06:06"}
+{"current_steps": 160, "total_steps": 1360, "loss": 0.2592, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8665391882260856e-05, "epoch": 0.5882352941176471, "percentage": 11.76, "elapsed_time": "0:00:48", "remaining_time": "0:06:02"}
+{"current_steps": 170, "total_steps": 1360, "loss": 0.2624, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.846996204000967e-05, "epoch": 0.625, "percentage": 12.5, "elapsed_time": "0:00:51", "remaining_time": "0:05:57"}
+{"current_steps": 180, "total_steps": 1360, "loss": 0.373, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8261632407677174e-05, "epoch": 0.6617647058823529, "percentage": 13.24, "elapsed_time": "0:00:53", "remaining_time": "0:05:53"}
+{"current_steps": 190, "total_steps": 1360, "loss": 0.2961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.804051748943343e-05, "epoch": 0.6985294117647058, "percentage": 13.97, "elapsed_time": "0:00:56", "remaining_time": "0:05:49"}
+{"current_steps": 200, "total_steps": 1360, "loss": 0.2575, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.780673881662242e-05, "epoch": 0.7352941176470589, "percentage": 14.71, "elapsed_time": "0:00:59", "remaining_time": "0:05:45"}
+{"current_steps": 200, "total_steps": 1360, "loss": null, "eval_loss": 0.22019265592098236, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.7352941176470589, "percentage": 14.71, "elapsed_time": "0:00:59", "remaining_time": "0:05:45"}
+{"current_steps": 210, "total_steps": 1360, "loss": 0.2374, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.756042488096471e-05, "epoch": 0.7720588235294118, "percentage": 15.44, "elapsed_time": "0:01:03", "remaining_time": "0:05:50"}
+{"current_steps": 220, "total_steps": 1360, "loss": 0.2592, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.730171106393466e-05, "epoch": 0.8088235294117647, "percentage": 16.18, "elapsed_time": "0:01:06", "remaining_time": "0:05:45"}
+{"current_steps": 230, "total_steps": 1360, "loss": 0.2067, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7030739562350713e-05, "epoch": 0.8455882352941176, "percentage": 16.91, "elapsed_time": "0:01:09", "remaining_time": "0:05:41"}
+{"current_steps": 240, "total_steps": 1360, "loss": 0.2181, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.674765931021976e-05, "epoch": 0.8823529411764706, "percentage": 17.65, "elapsed_time": "0:01:12", "remaining_time": "0:05:37"}
+{"current_steps": 250, "total_steps": 1360, "loss": 0.2213, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.645262589687861e-05, "epoch": 0.9191176470588235, "percentage": 18.38, "elapsed_time": "0:01:15", "remaining_time": "0:05:33"}
+{"current_steps": 260, "total_steps": 1360, "loss": 0.2569, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.614580148147744e-05, "epoch": 0.9558823529411765, "percentage": 19.12, "elapsed_time": "0:01:17", "remaining_time": "0:05:29"}
+{"current_steps": 270, "total_steps": 1360, "loss": 0.1528, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.582735470385229e-05, "epoch": 0.9926470588235294, "percentage": 19.85, "elapsed_time": "0:01:20", "remaining_time": "0:05:26"}
+{"current_steps": 280, "total_steps": 1360, "loss": 0.263, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5497460591835615e-05, "epoch": 1.0294117647058822, "percentage": 20.59, "elapsed_time": "0:01:23", "remaining_time": "0:05:22"}
+{"current_steps": 290, "total_steps": 1360, "loss": 0.1773, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.515630046505575e-05, "epoch": 1.0661764705882353, "percentage": 21.32, "elapsed_time": "0:01:26", "remaining_time": "0:05:19"}
+{"current_steps": 300, "total_steps": 1360, "loss": 0.2475, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.480406183527823e-05, "epoch": 1.1029411764705883, "percentage": 22.06, "elapsed_time": "0:01:29", "remaining_time": "0:05:15"}
+{"current_steps": 300, "total_steps": 1360, "loss": null, "eval_loss": 0.18564413487911224, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.1029411764705883, "percentage": 22.06, "elapsed_time": "0:01:29", "remaining_time": "0:05:15"}
+{"current_steps": 310, "total_steps": 1360, "loss": 0.1195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4440938303343804e-05, "epoch": 1.1397058823529411, "percentage": 22.79, "elapsed_time": "0:01:33", "remaining_time": "0:05:17"}
+{"current_steps": 320, "total_steps": 1360, "loss": 0.2049, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.406712945275955e-05, "epoch": 1.1764705882352942, "percentage": 23.53, "elapsed_time": "0:01:36", "remaining_time": "0:05:13"}
+{"current_steps": 330, "total_steps": 1360, "loss": 0.159, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.368284074000193e-05, "epoch": 1.213235294117647, "percentage": 24.26, "elapsed_time": "0:01:39", "remaining_time": "0:05:10"}
+{"current_steps": 340, "total_steps": 1360, "loss": 0.2263, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.328828338159173e-05, "epoch": 1.25, "percentage": 25.0, "elapsed_time": "0:01:42", "remaining_time": "0:05:06"}
+{"current_steps": 350, "total_steps": 1360, "loss": 0.2505, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.288367423800319e-05, "epoch": 1.2867647058823528, "percentage": 25.74, "elapsed_time": "0:01:44", "remaining_time": "0:05:02"}
+{"current_steps": 360, "total_steps": 1360, "loss": 0.1879, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.2469235694471043e-05, "epoch": 1.3235294117647058, "percentage": 26.47, "elapsed_time": "0:01:47", "remaining_time": "0:04:59"}
+{"current_steps": 370, "total_steps": 1360, "loss": 0.1437, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.204519553876095e-05, "epoch": 1.3602941176470589, "percentage": 27.21, "elapsed_time": "0:01:50", "remaining_time": "0:04:55"}
+{"current_steps": 380, "total_steps": 1360, "loss": 0.18, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.161178683597054e-05, "epoch": 1.3970588235294117, "percentage": 27.94, "elapsed_time": "0:01:53", "remaining_time": "0:04:52"}
+{"current_steps": 390, "total_steps": 1360, "loss": 0.2727, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.116924780042997e-05, "epoch": 1.4338235294117647, "percentage": 28.68, "elapsed_time": "0:01:56", "remaining_time": "0:04:48"}
+{"current_steps": 400, "total_steps": 1360, "loss": 0.1415, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.071782166477213e-05, "epoch": 1.4705882352941178, "percentage": 29.41, "elapsed_time": "0:01:58", "remaining_time": "0:04:45"}
+{"current_steps": 400, "total_steps": 1360, "loss": null, "eval_loss": 0.2149094045162201, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.4705882352941178, "percentage": 29.41, "elapsed_time": "0:01:58", "remaining_time": "0:04:45"}
+{"current_steps": 410, "total_steps": 1360, "loss": 0.1996, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0257756546244804e-05, "epoch": 1.5073529411764706, "percentage": 30.15, "elapsed_time": "0:02:03", "remaining_time": "0:04:45"}
+{"current_steps": 420, "total_steps": 1360, "loss": 0.2265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.978930531033807e-05, "epoch": 1.5441176470588234, "percentage": 30.88, "elapsed_time": "0:02:06", "remaining_time": "0:04:42"}
+{"current_steps": 430, "total_steps": 1360, "loss": 0.1411, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.93127254318018e-05, "epoch": 1.5808823529411766, "percentage": 31.62, "elapsed_time": "0:02:08", "remaining_time": "0:04:38"}
+{"current_steps": 440, "total_steps": 1360, "loss": 0.2201, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.882827885312999e-05, "epoch": 1.6176470588235294, "percentage": 32.35, "elapsed_time": "0:02:11", "remaining_time": "0:04:35"}
+{"current_steps": 450, "total_steps": 1360, "loss": 0.1829, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.833623184058926e-05, "epoch": 1.6544117647058822, "percentage": 33.09, "elapsed_time": "0:02:14", "remaining_time": "0:04:32"}
+{"current_steps": 460, "total_steps": 1360, "loss": 0.1413, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.783685483787105e-05, "epoch": 1.6911764705882353, "percentage": 33.82, "elapsed_time": "0:02:17", "remaining_time": "0:04:28"}
+{"current_steps": 470, "total_steps": 1360, "loss": 0.1551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7330422317447685e-05, "epoch": 1.7279411764705883, "percentage": 34.56, "elapsed_time": "0:02:20", "remaining_time": "0:04:25"}
+{"current_steps": 480, "total_steps": 1360, "loss": 0.2619, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.681721262971413e-05, "epoch": 1.7647058823529411, "percentage": 35.29, "elapsed_time": "0:02:23", "remaining_time": "0:04:22"}
+{"current_steps": 490, "total_steps": 1360, "loss": 0.237, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.6297507849998344e-05, "epoch": 1.8014705882352942, "percentage": 36.03, "elapsed_time": "0:02:25", "remaining_time": "0:04:19"}
+{"current_steps": 500, "total_steps": 1360, "loss": 0.1281, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5771593623524265e-05, "epoch": 1.8382352941176472, "percentage": 36.76, "elapsed_time": "0:02:28", "remaining_time": "0:04:15"}
+{"current_steps": 500, "total_steps": 1360, "loss": null, "eval_loss": 0.19497708976268768, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.8382352941176472, "percentage": 36.76, "elapsed_time": "0:02:28", "remaining_time": "0:04:15"}
+{"current_steps": 510, "total_steps": 1360, "loss": 0.1565, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5239759008412666e-05, "epoch": 1.875, "percentage": 37.5, "elapsed_time": "0:02:33", "remaining_time": "0:04:15"}
+{"current_steps": 520, "total_steps": 1360, "loss": 0.1924, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.4702296316806244e-05, "epoch": 1.9117647058823528, "percentage": 38.24, "elapsed_time": "0:02:35", "remaining_time": "0:04:11"}
+{"current_steps": 530, "total_steps": 1360, "loss": 0.1904, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.415950095420616e-05, "epoch": 1.9485294117647058, "percentage": 38.97, "elapsed_time": "0:02:38", "remaining_time": "0:04:08"}
+{"current_steps": 540, "total_steps": 1360, "loss": 0.1523, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.361167125710832e-05, "epoch": 1.9852941176470589, "percentage": 39.71, "elapsed_time": "0:02:41", "remaining_time": "0:04:05"}
+{"current_steps": 550, "total_steps": 1360, "loss": 0.1413, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.305910832902884e-05, "epoch": 2.0220588235294117, "percentage": 40.44, "elapsed_time": "0:02:44", "remaining_time": "0:04:01"}
+{"current_steps": 560, "total_steps": 1360, "loss": 0.1112, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.2502115875008524e-05, "epoch": 2.0588235294117645, "percentage": 41.18, "elapsed_time": "0:02:47", "remaining_time": "0:03:58"}
+{"current_steps": 570, "total_steps": 1360, "loss": 0.0935, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.1941000034687515e-05, "epoch": 2.0955882352941178, "percentage": 41.91, "elapsed_time": "0:02:50", "remaining_time": "0:03:55"}
+{"current_steps": 580, "total_steps": 1360, "loss": 0.1512, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.1376069214041913e-05, "epoch": 2.1323529411764706, "percentage": 42.65, "elapsed_time": "0:02:52", "remaining_time": "0:03:52"}
+{"current_steps": 590, "total_steps": 1360, "loss": 0.1686, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.0807633915874584e-05, "epoch": 2.1691176470588234, "percentage": 43.38, "elapsed_time": "0:02:55", "remaining_time": "0:03:49"}
+{"current_steps": 600, "total_steps": 1360, "loss": 0.173, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.0236006569153617e-05, "epoch": 2.2058823529411766, "percentage": 44.12, "elapsed_time": "0:02:58", "remaining_time": "0:03:46"}
+{"current_steps": 600, "total_steps": 1360, "loss": null, "eval_loss": 0.2179846614599228, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.2058823529411766, "percentage": 44.12, "elapsed_time": "0:02:58", "remaining_time": "0:03:46"}
+{"current_steps": 600, "total_steps": 1360, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.2058823529411766, "percentage": 44.12, "elapsed_time": "0:02:58", "remaining_time": "0:03:46"}
+{"current_steps": 15, "total_steps": 15, "loss": null, "eval_loss": 0.18564413487911224, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.2058823529411766, "percentage": 100.0, "elapsed_time": "0:03:09", "remaining_time": "0:00:00"}
diff --git a/llama2_13b_peft/winowhy/trainer_state.json b/llama2_13b_peft/winowhy/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..391edc4290881858610b268e64840ebd258bee6d
--- /dev/null
+++ b/llama2_13b_peft/winowhy/trainer_state.json
@@ -0,0 +1,498 @@
+{
+ "best_metric": 0.18564413487911224,
+ "best_model_checkpoint": "ckpt/llama2_13b_fuze27_no_sys/winowhy_no_sys/checkpoint-300",
+ "epoch": 2.2058823529411766,
+ "eval_steps": 100,
+ "global_step": 600,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.03676470588235294,
+ "grad_norm": 19.647098541259766,
+ "learning_rate": 2.5e-05,
+ "loss": 9.0197,
+ "step": 10
+ },
+ {
+ "epoch": 0.07352941176470588,
+ "grad_norm": 8.87275218963623,
+ "learning_rate": 5e-05,
+ "loss": 6.0796,
+ "step": 20
+ },
+ {
+ "epoch": 0.11029411764705882,
+ "grad_norm": 3.1659748554229736,
+ "learning_rate": 4.99931296277454e-05,
+ "loss": 0.7866,
+ "step": 30
+ },
+ {
+ "epoch": 0.14705882352941177,
+ "grad_norm": 2.318621873855591,
+ "learning_rate": 4.997252228714279e-05,
+ "loss": 0.3572,
+ "step": 40
+ },
+ {
+ "epoch": 0.18382352941176472,
+ "grad_norm": 4.025834560394287,
+ "learning_rate": 4.993818930460026e-05,
+ "loss": 0.3669,
+ "step": 50
+ },
+ {
+ "epoch": 0.22058823529411764,
+ "grad_norm": 6.20689058303833,
+ "learning_rate": 4.9890149550547454e-05,
+ "loss": 0.364,
+ "step": 60
+ },
+ {
+ "epoch": 0.25735294117647056,
+ "grad_norm": 1.2856365442276,
+ "learning_rate": 4.982842942906386e-05,
+ "loss": 0.3211,
+ "step": 70
+ },
+ {
+ "epoch": 0.29411764705882354,
+ "grad_norm": 2.2388663291931152,
+ "learning_rate": 4.9753062863366276e-05,
+ "loss": 0.3052,
+ "step": 80
+ },
+ {
+ "epoch": 0.33088235294117646,
+ "grad_norm": 4.5252156257629395,
+ "learning_rate": 4.966409127716367e-05,
+ "loss": 0.3446,
+ "step": 90
+ },
+ {
+ "epoch": 0.36764705882352944,
+ "grad_norm": 1.3918992280960083,
+ "learning_rate": 4.95615635718894e-05,
+ "loss": 0.3041,
+ "step": 100
+ },
+ {
+ "epoch": 0.36764705882352944,
+ "eval_loss": 0.3207797408103943,
+ "eval_runtime": 1.2144,
+ "eval_samples_per_second": 94.698,
+ "eval_steps_per_second": 12.352,
+ "step": 100
+ },
+ {
+ "epoch": 0.40441176470588236,
+ "grad_norm": 1.2850301265716553,
+ "learning_rate": 4.944553609982363e-05,
+ "loss": 0.2438,
+ "step": 110
+ },
+ {
+ "epoch": 0.4411764705882353,
+ "grad_norm": 2.6484286785125732,
+ "learning_rate": 4.931607263312032e-05,
+ "loss": 0.3688,
+ "step": 120
+ },
+ {
+ "epoch": 0.47794117647058826,
+ "grad_norm": 1.9035048484802246,
+ "learning_rate": 4.917324432875627e-05,
+ "loss": 0.2945,
+ "step": 130
+ },
+ {
+ "epoch": 0.5147058823529411,
+ "grad_norm": 1.4788445234298706,
+ "learning_rate": 4.9017129689421e-05,
+ "loss": 0.2764,
+ "step": 140
+ },
+ {
+ "epoch": 0.5514705882352942,
+ "grad_norm": 3.2268259525299072,
+ "learning_rate": 4.8847814520369475e-05,
+ "loss": 0.3507,
+ "step": 150
+ },
+ {
+ "epoch": 0.5882352941176471,
+ "grad_norm": 4.3639116287231445,
+ "learning_rate": 4.8665391882260856e-05,
+ "loss": 0.2592,
+ "step": 160
+ },
+ {
+ "epoch": 0.625,
+ "grad_norm": 1.9433594942092896,
+ "learning_rate": 4.846996204000967e-05,
+ "loss": 0.2624,
+ "step": 170
+ },
+ {
+ "epoch": 0.6617647058823529,
+ "grad_norm": 4.276350498199463,
+ "learning_rate": 4.8261632407677174e-05,
+ "loss": 0.373,
+ "step": 180
+ },
+ {
+ "epoch": 0.6985294117647058,
+ "grad_norm": 5.467511177062988,
+ "learning_rate": 4.804051748943343e-05,
+ "loss": 0.2961,
+ "step": 190
+ },
+ {
+ "epoch": 0.7352941176470589,
+ "grad_norm": 1.3400689363479614,
+ "learning_rate": 4.780673881662242e-05,
+ "loss": 0.2575,
+ "step": 200
+ },
+ {
+ "epoch": 0.7352941176470589,
+ "eval_loss": 0.22019265592098236,
+ "eval_runtime": 1.2151,
+ "eval_samples_per_second": 94.641,
+ "eval_steps_per_second": 12.345,
+ "step": 200
+ },
+ {
+ "epoch": 0.7720588235294118,
+ "grad_norm": 1.3730210065841675,
+ "learning_rate": 4.756042488096471e-05,
+ "loss": 0.2374,
+ "step": 210
+ },
+ {
+ "epoch": 0.8088235294117647,
+ "grad_norm": 1.551169753074646,
+ "learning_rate": 4.730171106393466e-05,
+ "loss": 0.2592,
+ "step": 220
+ },
+ {
+ "epoch": 0.8455882352941176,
+ "grad_norm": 1.8357897996902466,
+ "learning_rate": 4.7030739562350713e-05,
+ "loss": 0.2067,
+ "step": 230
+ },
+ {
+ "epoch": 0.8823529411764706,
+ "grad_norm": 1.9791502952575684,
+ "learning_rate": 4.674765931021976e-05,
+ "loss": 0.2181,
+ "step": 240
+ },
+ {
+ "epoch": 0.9191176470588235,
+ "grad_norm": 3.2839415073394775,
+ "learning_rate": 4.645262589687861e-05,
+ "loss": 0.2213,
+ "step": 250
+ },
+ {
+ "epoch": 0.9558823529411765,
+ "grad_norm": 6.267187118530273,
+ "learning_rate": 4.614580148147744e-05,
+ "loss": 0.2569,
+ "step": 260
+ },
+ {
+ "epoch": 0.9926470588235294,
+ "grad_norm": 0.45617279410362244,
+ "learning_rate": 4.582735470385229e-05,
+ "loss": 0.1528,
+ "step": 270
+ },
+ {
+ "epoch": 1.0294117647058822,
+ "grad_norm": 2.27298641204834,
+ "learning_rate": 4.5497460591835615e-05,
+ "loss": 0.263,
+ "step": 280
+ },
+ {
+ "epoch": 1.0661764705882353,
+ "grad_norm": 0.889145016670227,
+ "learning_rate": 4.515630046505575e-05,
+ "loss": 0.1773,
+ "step": 290
+ },
+ {
+ "epoch": 1.1029411764705883,
+ "grad_norm": 1.5614073276519775,
+ "learning_rate": 4.480406183527823e-05,
+ "loss": 0.2475,
+ "step": 300
+ },
+ {
+ "epoch": 1.1029411764705883,
+ "eval_loss": 0.18564413487911224,
+ "eval_runtime": 1.2151,
+ "eval_samples_per_second": 94.643,
+ "eval_steps_per_second": 12.345,
+ "step": 300
+ },
+ {
+ "epoch": 1.1397058823529411,
+ "grad_norm": 1.2656581401824951,
+ "learning_rate": 4.4440938303343804e-05,
+ "loss": 0.1195,
+ "step": 310
+ },
+ {
+ "epoch": 1.1764705882352942,
+ "grad_norm": 12.3343505859375,
+ "learning_rate": 4.406712945275955e-05,
+ "loss": 0.2049,
+ "step": 320
+ },
+ {
+ "epoch": 1.213235294117647,
+ "grad_norm": 3.2121803760528564,
+ "learning_rate": 4.368284074000193e-05,
+ "loss": 0.159,
+ "step": 330
+ },
+ {
+ "epoch": 1.25,
+ "grad_norm": 2.942842960357666,
+ "learning_rate": 4.328828338159173e-05,
+ "loss": 0.2263,
+ "step": 340
+ },
+ {
+ "epoch": 1.2867647058823528,
+ "grad_norm": 2.7704460620880127,
+ "learning_rate": 4.288367423800319e-05,
+ "loss": 0.2505,
+ "step": 350
+ },
+ {
+ "epoch": 1.3235294117647058,
+ "grad_norm": 2.340487241744995,
+ "learning_rate": 4.2469235694471043e-05,
+ "loss": 0.1879,
+ "step": 360
+ },
+ {
+ "epoch": 1.3602941176470589,
+ "grad_norm": 1.4567615985870361,
+ "learning_rate": 4.204519553876095e-05,
+ "loss": 0.1437,
+ "step": 370
+ },
+ {
+ "epoch": 1.3970588235294117,
+ "grad_norm": 1.2511286735534668,
+ "learning_rate": 4.161178683597054e-05,
+ "loss": 0.18,
+ "step": 380
+ },
+ {
+ "epoch": 1.4338235294117647,
+ "grad_norm": 1.8652863502502441,
+ "learning_rate": 4.116924780042997e-05,
+ "loss": 0.2727,
+ "step": 390
+ },
+ {
+ "epoch": 1.4705882352941178,
+ "grad_norm": 1.7881311178207397,
+ "learning_rate": 4.071782166477213e-05,
+ "loss": 0.1415,
+ "step": 400
+ },
+ {
+ "epoch": 1.4705882352941178,
+ "eval_loss": 0.2149094045162201,
+ "eval_runtime": 1.2151,
+ "eval_samples_per_second": 94.639,
+ "eval_steps_per_second": 12.344,
+ "step": 400
+ },
+ {
+ "epoch": 1.5073529411764706,
+ "grad_norm": 0.5304898023605347,
+ "learning_rate": 4.0257756546244804e-05,
+ "loss": 0.1996,
+ "step": 410
+ },
+ {
+ "epoch": 1.5441176470588234,
+ "grad_norm": 3.2032785415649414,
+ "learning_rate": 3.978930531033807e-05,
+ "loss": 0.2265,
+ "step": 420
+ },
+ {
+ "epoch": 1.5808823529411766,
+ "grad_norm": 0.8113433718681335,
+ "learning_rate": 3.93127254318018e-05,
+ "loss": 0.1411,
+ "step": 430
+ },
+ {
+ "epoch": 1.6176470588235294,
+ "grad_norm": 0.9410021901130676,
+ "learning_rate": 3.882827885312999e-05,
+ "loss": 0.2201,
+ "step": 440
+ },
+ {
+ "epoch": 1.6544117647058822,
+ "grad_norm": 3.979304790496826,
+ "learning_rate": 3.833623184058926e-05,
+ "loss": 0.1829,
+ "step": 450
+ },
+ {
+ "epoch": 1.6911764705882353,
+ "grad_norm": 4.951131343841553,
+ "learning_rate": 3.783685483787105e-05,
+ "loss": 0.1413,
+ "step": 460
+ },
+ {
+ "epoch": 1.7279411764705883,
+ "grad_norm": 4.674718379974365,
+ "learning_rate": 3.7330422317447685e-05,
+ "loss": 0.1551,
+ "step": 470
+ },
+ {
+ "epoch": 1.7647058823529411,
+ "grad_norm": 2.091346025466919,
+ "learning_rate": 3.681721262971413e-05,
+ "loss": 0.2619,
+ "step": 480
+ },
+ {
+ "epoch": 1.8014705882352942,
+ "grad_norm": 2.1362998485565186,
+ "learning_rate": 3.6297507849998344e-05,
+ "loss": 0.237,
+ "step": 490
+ },
+ {
+ "epoch": 1.8382352941176472,
+ "grad_norm": 0.47038739919662476,
+ "learning_rate": 3.5771593623524265e-05,
+ "loss": 0.1281,
+ "step": 500
+ },
+ {
+ "epoch": 1.8382352941176472,
+ "eval_loss": 0.19497708976268768,
+ "eval_runtime": 1.2149,
+ "eval_samples_per_second": 94.658,
+ "eval_steps_per_second": 12.347,
+ "step": 500
+ },
+ {
+ "epoch": 1.875,
+ "grad_norm": 1.3052918910980225,
+ "learning_rate": 3.5239759008412666e-05,
+ "loss": 0.1565,
+ "step": 510
+ },
+ {
+ "epoch": 1.9117647058823528,
+ "grad_norm": 2.9952781200408936,
+ "learning_rate": 3.4702296316806244e-05,
+ "loss": 0.1924,
+ "step": 520
+ },
+ {
+ "epoch": 1.9485294117647058,
+ "grad_norm": 0.8276472091674805,
+ "learning_rate": 3.415950095420616e-05,
+ "loss": 0.1904,
+ "step": 530
+ },
+ {
+ "epoch": 1.9852941176470589,
+ "grad_norm": 2.209911823272705,
+ "learning_rate": 3.361167125710832e-05,
+ "loss": 0.1523,
+ "step": 540
+ },
+ {
+ "epoch": 2.0220588235294117,
+ "grad_norm": 1.9291682243347168,
+ "learning_rate": 3.305910832902884e-05,
+ "loss": 0.1413,
+ "step": 550
+ },
+ {
+ "epoch": 2.0588235294117645,
+ "grad_norm": 0.1702612340450287,
+ "learning_rate": 3.2502115875008524e-05,
+ "loss": 0.1112,
+ "step": 560
+ },
+ {
+ "epoch": 2.0955882352941178,
+ "grad_norm": 1.7829452753067017,
+ "learning_rate": 3.1941000034687515e-05,
+ "loss": 0.0935,
+ "step": 570
+ },
+ {
+ "epoch": 2.1323529411764706,
+ "grad_norm": 1.0806934833526611,
+ "learning_rate": 3.1376069214041913e-05,
+ "loss": 0.1512,
+ "step": 580
+ },
+ {
+ "epoch": 2.1691176470588234,
+ "grad_norm": 0.183503657579422,
+ "learning_rate": 3.0807633915874584e-05,
+ "loss": 0.1686,
+ "step": 590
+ },
+ {
+ "epoch": 2.2058823529411766,
+ "grad_norm": 3.425459146499634,
+ "learning_rate": 3.0236006569153617e-05,
+ "loss": 0.173,
+ "step": 600
+ },
+ {
+ "epoch": 2.2058823529411766,
+ "eval_loss": 0.2179846614599228,
+ "eval_runtime": 1.2136,
+ "eval_samples_per_second": 94.756,
+ "eval_steps_per_second": 12.359,
+ "step": 600
+ },
+ {
+ "epoch": 2.2058823529411766,
+ "step": 600,
+ "total_flos": 3.440278732852429e+16,
+ "train_loss": 0.47969158987204236,
+ "train_runtime": 190.8837,
+ "train_samples_per_second": 56.972,
+ "train_steps_per_second": 7.125
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 1360,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 100,
+ "total_flos": 3.440278732852429e+16,
+ "train_batch_size": 4,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/llama2_13b_peft/winowhy/training_args.bin b/llama2_13b_peft/winowhy/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e0055507d12524612b0f7a35112c2feb696cebfb
--- /dev/null
+++ b/llama2_13b_peft/winowhy/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75e78f9ac454afc1cb35d5cbbebb5101ebec0583fcd21eea8b49467fe7e3fcc7
+size 5176
diff --git a/llama2_13b_peft/winowhy/training_eval_loss.png b/llama2_13b_peft/winowhy/training_eval_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..c1d2515250ae98a659503bd89fb823d7e6c1a8a9
Binary files /dev/null and b/llama2_13b_peft/winowhy/training_eval_loss.png differ
diff --git a/llama2_13b_peft/winowhy/training_loss.png b/llama2_13b_peft/winowhy/training_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..155177bbb5ad253adb93c159a1788b30278afc70
Binary files /dev/null and b/llama2_13b_peft/winowhy/training_loss.png differ