diff --git a/.gitattributes b/.gitattributes index 7c0d7b355d6a474f4d6d859bf918374e5ded169a..47edb507ecea17a9e97c66ded9d1f86c6d0df2a1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -34,3 +34,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text model_judgment/gpt-4_single.jsonl filter=lfs diff=lfs merge=lfs -text +checkpoint-128/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-192/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-256/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-320/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-384/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-448/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-472/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-64/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3f7df1775b367c58650fcb177ed8c70f041f2a06 --- /dev/null +++ b/all_results.json @@ -0,0 +1,22 @@ +{ + "epoch": 0.9979517674264948, + "eval_logits/chosen": -0.6275817155838013, + "eval_logits/rejected": -0.6124553680419922, + "eval_logps/chosen": -1.7196346521377563, + "eval_logps/rejected": -2.078376293182373, + "eval_loss": 1.193624496459961, + "eval_rewards/accuracies": 0.6531440019607544, + "eval_rewards/chosen": -3.4392693042755127, + "eval_rewards/margins": 0.7174834609031677, + "eval_rewards/rejected": -4.156752586364746, + "eval_runtime": 401.2915, + "eval_samples": 1972, + "eval_samples_per_second": 4.914, + "eval_steps_per_second": 1.229, + "total_flos": 0.0, + "train_loss": 1.280224425307775, + "train_runtime": 38087.5267, + "train_samples": 60539, + "train_samples_per_second": 1.589, + "train_steps_per_second": 0.012 +} \ No newline at end of file diff --git a/checkpoint-128/config.json b/checkpoint-128/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ead276a9e0bbd4a0baae360715d8e80853974eb6 --- /dev/null +++ b/checkpoint-128/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "/home/v-liuzichuan/huggingface/Llama-3.2-3B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/checkpoint-128/generation_config.json b/checkpoint-128/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18d16b4cc3c799dd78e854fd824ddf8a2996285 --- /dev/null +++ b/checkpoint-128/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.2" +} diff --git a/checkpoint-128/model-00001-of-00002.safetensors b/checkpoint-128/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2eef4e22e19daaee71700fc382e8fd95129a52ca --- /dev/null +++ b/checkpoint-128/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b3949429d169c7668c83fa584c88284eef7d64033554e5b8cdd72a1aa0e652e +size 4965799096 diff --git a/checkpoint-128/model-00002-of-00002.safetensors b/checkpoint-128/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d50f063651b6d6d4f9a7fdedf7b83b784c9ef387 --- /dev/null +++ b/checkpoint-128/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a29dc0a6c3cb1ea1940f7552b4601e7e8f548326fe04f4251c417b520f2813 +size 2247734992 diff --git a/checkpoint-128/model.safetensors.index.json b/checkpoint-128/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ed64de846d720b9a7859dc20575fea8e8ca51940 --- /dev/null +++ b/checkpoint-128/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_size": 7213504512 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-128/special_tokens_map.json b/checkpoint-128/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-128/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-128/tokenizer.json b/checkpoint-128/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-128/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-128/tokenizer_config.json b/checkpoint-128/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdb7bfcc59fd293867d98e124a30b3ddf39c93 --- /dev/null +++ b/checkpoint-128/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-128/trainer_state.json b/checkpoint-128/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c0f079a377dbba76add4dbd983ac6214102e161a --- /dev/null +++ b/checkpoint-128/trainer_state.json @@ -0,0 +1,1953 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2706309877766766, + "eval_steps": 500, + "global_step": 128, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002114304592005286, + "grad_norm": 1.5018059015274048, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.3466828167438507, + "logits/rejected": -0.30099987983703613, + "logps/chosen": -0.9345186948776245, + "logps/rejected": -0.9117153882980347, + "loss": 1.4889, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.869037389755249, + "rewards/margins": -0.04560665041208267, + "rewards/rejected": -1.8234307765960693, + "step": 1 + }, + { + "epoch": 0.004228609184010572, + "grad_norm": 0.8093975186347961, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.4310421049594879, + "logits/rejected": -0.39132067561149597, + "logps/chosen": -0.8198825716972351, + "logps/rejected": -0.8644211888313293, + "loss": 1.376, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.6397651433944702, + "rewards/margins": 0.08907715976238251, + "rewards/rejected": -1.7288423776626587, + "step": 2 + }, + { + "epoch": 0.006342913776015857, + "grad_norm": 0.5377389788627625, + "learning_rate": 6.25e-08, + "logits/chosen": -0.46692028641700745, + "logits/rejected": -0.4649256467819214, + "logps/chosen": -0.9087910652160645, + "logps/rejected": -0.9648240804672241, + "loss": 1.3404, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.817582130432129, + "rewards/margins": 0.11206617206335068, + "rewards/rejected": -1.9296481609344482, + "step": 3 + }, + { + "epoch": 0.008457218368021144, + "grad_norm": 0.3221875727176666, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.416828453540802, + "logits/rejected": -0.3584724962711334, + "logps/chosen": -0.7818898558616638, + "logps/rejected": -0.8170815110206604, + "loss": 1.3806, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.5637797117233276, + "rewards/margins": 0.07038339227437973, + "rewards/rejected": -1.6341630220413208, + "step": 4 + }, + { + "epoch": 0.010571522960026428, + "grad_norm": 0.64655601978302, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.376886248588562, + "logits/rejected": -0.3516141474246979, + "logps/chosen": -0.8814125061035156, + "logps/rejected": -1.0214396715164185, + "loss": 1.2741, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7628250122070312, + "rewards/margins": 0.28005433082580566, + "rewards/rejected": -2.042879343032837, + "step": 5 + }, + { + "epoch": 0.012685827552031714, + "grad_norm": 0.4775894582271576, + "learning_rate": 1.25e-07, + "logits/chosen": -0.4757865369319916, + "logits/rejected": -0.4498941898345947, + "logps/chosen": -0.8962199687957764, + "logps/rejected": -0.9462199807167053, + "loss": 1.364, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.7924399375915527, + "rewards/margins": 0.10000008344650269, + "rewards/rejected": -1.8924399614334106, + "step": 6 + }, + { + "epoch": 0.014800132144037, + "grad_norm": 1.2459568977355957, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.38895344734191895, + "logits/rejected": -0.38165366649627686, + "logps/chosen": -0.9025766253471375, + "logps/rejected": -0.9465017318725586, + "loss": 1.3898, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.805153250694275, + "rewards/margins": 0.0878501906991005, + "rewards/rejected": -1.8930034637451172, + "step": 7 + }, + { + "epoch": 0.016914436736042288, + "grad_norm": 0.6195729374885559, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.3964853286743164, + "logits/rejected": -0.377862811088562, + "logps/chosen": -0.9054160118103027, + "logps/rejected": -0.9605879187583923, + "loss": 1.3821, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8108320236206055, + "rewards/margins": 0.1103438138961792, + "rewards/rejected": -1.9211758375167847, + "step": 8 + }, + { + "epoch": 0.019028741328047574, + "grad_norm": 1.2074137926101685, + "learning_rate": 1.875e-07, + "logits/chosen": -0.3729037344455719, + "logits/rejected": -0.38143450021743774, + "logps/chosen": -0.9328653216362, + "logps/rejected": -0.9905799627304077, + "loss": 1.3754, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8657306432724, + "rewards/margins": 0.11542946100234985, + "rewards/rejected": -1.9811599254608154, + "step": 9 + }, + { + "epoch": 0.021143045920052856, + "grad_norm": 0.2867220640182495, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.4263336658477783, + "logits/rejected": -0.42903271317481995, + "logps/chosen": -0.8979260325431824, + "logps/rejected": -0.9078099727630615, + "loss": 1.4438, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7958520650863647, + "rewards/margins": 0.019767940044403076, + "rewards/rejected": -1.815619945526123, + "step": 10 + }, + { + "epoch": 0.023257350512058142, + "grad_norm": 0.8363026976585388, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.3374914526939392, + "logits/rejected": -0.32399696111679077, + "logps/chosen": -0.8886098861694336, + "logps/rejected": -0.9484556317329407, + "loss": 1.3422, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7772197723388672, + "rewards/margins": 0.11969132721424103, + "rewards/rejected": -1.8969112634658813, + "step": 11 + }, + { + "epoch": 0.025371655104063428, + "grad_norm": 0.5406804084777832, + "learning_rate": 2.5e-07, + "logits/chosen": -0.42844679951667786, + "logits/rejected": -0.37984615564346313, + "logps/chosen": -0.861629843711853, + "logps/rejected": -0.8968492150306702, + "loss": 1.3922, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.723259687423706, + "rewards/margins": 0.07043875753879547, + "rewards/rejected": -1.7936984300613403, + "step": 12 + }, + { + "epoch": 0.027485959696068714, + "grad_norm": 0.9919329285621643, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.36495402455329895, + "logits/rejected": -0.3249490261077881, + "logps/chosen": -0.8502095937728882, + "logps/rejected": -0.8470643758773804, + "loss": 1.4334, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7004191875457764, + "rewards/margins": -0.006290358491241932, + "rewards/rejected": -1.6941287517547607, + "step": 13 + }, + { + "epoch": 0.029600264288074, + "grad_norm": 0.5477162003517151, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.4155704081058502, + "logits/rejected": -0.39535820484161377, + "logps/chosen": -1.0430240631103516, + "logps/rejected": -1.1318373680114746, + "loss": 1.3533, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.086048126220703, + "rewards/margins": 0.17762640118598938, + "rewards/rejected": -2.263674736022949, + "step": 14 + }, + { + "epoch": 0.031714568880079286, + "grad_norm": 0.26530712842941284, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.4810572564601898, + "logits/rejected": -0.42454615235328674, + "logps/chosen": -0.8741041421890259, + "logps/rejected": -0.9494178295135498, + "loss": 1.3655, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7482082843780518, + "rewards/margins": 0.15062758326530457, + "rewards/rejected": -1.8988356590270996, + "step": 15 + }, + { + "epoch": 0.033828873472084575, + "grad_norm": 0.9272629618644714, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.4440098702907562, + "logits/rejected": -0.3930297791957855, + "logps/chosen": -0.8473359942436218, + "logps/rejected": -0.9369213581085205, + "loss": 1.3248, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6946719884872437, + "rewards/margins": 0.17917080223560333, + "rewards/rejected": -1.873842716217041, + "step": 16 + }, + { + "epoch": 0.03594317806408986, + "grad_norm": 0.5912418961524963, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.3838099539279938, + "logits/rejected": -0.3507584035396576, + "logps/chosen": -0.8888350129127502, + "logps/rejected": -0.9361770749092102, + "loss": 1.383, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.7776700258255005, + "rewards/margins": 0.0946839451789856, + "rewards/rejected": -1.8723541498184204, + "step": 17 + }, + { + "epoch": 0.03805748265609515, + "grad_norm": 0.6536504030227661, + "learning_rate": 3.75e-07, + "logits/chosen": -0.3581697940826416, + "logits/rejected": -0.3620460629463196, + "logps/chosen": -0.8519617319107056, + "logps/rejected": -0.9022184610366821, + "loss": 1.3841, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7039234638214111, + "rewards/margins": 0.10051343590021133, + "rewards/rejected": -1.8044369220733643, + "step": 18 + }, + { + "epoch": 0.04017178724810043, + "grad_norm": 0.3433632552623749, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.37887442111968994, + "logits/rejected": -0.37543320655822754, + "logps/chosen": -0.9464104175567627, + "logps/rejected": -1.0017329454421997, + "loss": 1.3649, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8928208351135254, + "rewards/margins": 0.11064518243074417, + "rewards/rejected": -2.0034658908843994, + "step": 19 + }, + { + "epoch": 0.04228609184010571, + "grad_norm": 0.9764007329940796, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.44110679626464844, + "logits/rejected": -0.4280649721622467, + "logps/chosen": -0.9046768546104431, + "logps/rejected": -1.0464633703231812, + "loss": 1.2592, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.8093537092208862, + "rewards/margins": 0.2835729420185089, + "rewards/rejected": -2.0929267406463623, + "step": 20 + }, + { + "epoch": 0.044400396432111, + "grad_norm": 1.8563830852508545, + "learning_rate": 4.375e-07, + "logits/chosen": -0.45183491706848145, + "logits/rejected": -0.42935287952423096, + "logps/chosen": -0.9043138027191162, + "logps/rejected": -0.9462392926216125, + "loss": 1.3784, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.8086276054382324, + "rewards/margins": 0.08385094255208969, + "rewards/rejected": -1.892478585243225, + "step": 21 + }, + { + "epoch": 0.046514701024116284, + "grad_norm": 1.3473299741744995, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.37855517864227295, + "logits/rejected": -0.34429043531417847, + "logps/chosen": -0.9284683465957642, + "logps/rejected": -0.9454050064086914, + "loss": 1.4346, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.8569366931915283, + "rewards/margins": 0.03387312963604927, + "rewards/rejected": -1.8908100128173828, + "step": 22 + }, + { + "epoch": 0.04862900561612157, + "grad_norm": 0.940831184387207, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.39172160625457764, + "logits/rejected": -0.3695780634880066, + "logps/chosen": -0.9314202666282654, + "logps/rejected": -1.020229697227478, + "loss": 1.3322, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8628405332565308, + "rewards/margins": 0.17761869728565216, + "rewards/rejected": -2.040459394454956, + "step": 23 + }, + { + "epoch": 0.050743310208126856, + "grad_norm": 0.5783158540725708, + "learning_rate": 5e-07, + "logits/chosen": -0.4958629608154297, + "logits/rejected": -0.4257377088069916, + "logps/chosen": -0.9379237294197083, + "logps/rejected": -0.9415461421012878, + "loss": 1.441, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.8758474588394165, + "rewards/margins": 0.0072449808940291405, + "rewards/rejected": -1.8830922842025757, + "step": 24 + }, + { + "epoch": 0.052857614800132145, + "grad_norm": 1.4209853410720825, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -0.36407172679901123, + "logits/rejected": -0.3331725299358368, + "logps/chosen": -0.9192589521408081, + "logps/rejected": -0.9595308899879456, + "loss": 1.3994, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8385179042816162, + "rewards/margins": 0.080544114112854, + "rewards/rejected": -1.9190617799758911, + "step": 25 + }, + { + "epoch": 0.05497191939213743, + "grad_norm": 0.6310216188430786, + "learning_rate": 5.416666666666666e-07, + "logits/chosen": -0.41772690415382385, + "logits/rejected": -0.36565953493118286, + "logps/chosen": -0.8052878379821777, + "logps/rejected": -0.8673746585845947, + "loss": 1.3356, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6105756759643555, + "rewards/margins": 0.12417369335889816, + "rewards/rejected": -1.7347493171691895, + "step": 26 + }, + { + "epoch": 0.05708622398414272, + "grad_norm": 1.2933462858200073, + "learning_rate": 5.625e-07, + "logits/chosen": -0.4482795000076294, + "logits/rejected": -0.39409321546554565, + "logps/chosen": -0.8339261412620544, + "logps/rejected": -0.8675202131271362, + "loss": 1.3739, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.6678522825241089, + "rewards/margins": 0.06718815863132477, + "rewards/rejected": -1.7350404262542725, + "step": 27 + }, + { + "epoch": 0.059200528576148, + "grad_norm": 0.5808025002479553, + "learning_rate": 5.833333333333334e-07, + "logits/chosen": -0.37116044759750366, + "logits/rejected": -0.3478051722049713, + "logps/chosen": -0.8950318694114685, + "logps/rejected": -0.9756672978401184, + "loss": 1.3505, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.790063738822937, + "rewards/margins": 0.1612708568572998, + "rewards/rejected": -1.9513345956802368, + "step": 28 + }, + { + "epoch": 0.06131483316815329, + "grad_norm": 1.0569533109664917, + "learning_rate": 6.041666666666666e-07, + "logits/chosen": -0.421148419380188, + "logits/rejected": -0.38443076610565186, + "logps/chosen": -0.8021283745765686, + "logps/rejected": -0.8370179533958435, + "loss": 1.3916, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.6042567491531372, + "rewards/margins": 0.06977920234203339, + "rewards/rejected": -1.674035906791687, + "step": 29 + }, + { + "epoch": 0.06342913776015857, + "grad_norm": 0.42577147483825684, + "learning_rate": 6.249999999999999e-07, + "logits/chosen": -0.4429818391799927, + "logits/rejected": -0.3524704575538635, + "logps/chosen": -0.8916822671890259, + "logps/rejected": -0.8985542058944702, + "loss": 1.4321, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.7833645343780518, + "rewards/margins": 0.01374388113617897, + "rewards/rejected": -1.7971084117889404, + "step": 30 + }, + { + "epoch": 0.06554344235216386, + "grad_norm": 1.0056904554367065, + "learning_rate": 6.458333333333333e-07, + "logits/chosen": -0.376451700925827, + "logits/rejected": -0.342519074678421, + "logps/chosen": -0.9038617014884949, + "logps/rejected": -0.953092634677887, + "loss": 1.398, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8077234029769897, + "rewards/margins": 0.09846188127994537, + "rewards/rejected": -1.906185269355774, + "step": 31 + }, + { + "epoch": 0.06765774694416915, + "grad_norm": 0.5494012236595154, + "learning_rate": 6.666666666666666e-07, + "logits/chosen": -0.3459138870239258, + "logits/rejected": -0.3590989410877228, + "logps/chosen": -0.8274999260902405, + "logps/rejected": -0.8776509761810303, + "loss": 1.363, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.654999852180481, + "rewards/margins": 0.1003020703792572, + "rewards/rejected": -1.7553019523620605, + "step": 32 + }, + { + "epoch": 0.06977205153617443, + "grad_norm": 0.693267822265625, + "learning_rate": 6.875e-07, + "logits/chosen": -0.40053680539131165, + "logits/rejected": -0.37323904037475586, + "logps/chosen": -0.8255244493484497, + "logps/rejected": -0.8658804893493652, + "loss": 1.3712, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6510488986968994, + "rewards/margins": 0.08071210980415344, + "rewards/rejected": -1.7317609786987305, + "step": 33 + }, + { + "epoch": 0.07188635612817972, + "grad_norm": 2.213238000869751, + "learning_rate": 7.083333333333334e-07, + "logits/chosen": -0.40097948908805847, + "logits/rejected": -0.38190510869026184, + "logps/chosen": -0.9122671484947205, + "logps/rejected": -0.9549552798271179, + "loss": 1.36, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.824534296989441, + "rewards/margins": 0.0853763073682785, + "rewards/rejected": -1.9099105596542358, + "step": 34 + }, + { + "epoch": 0.074000660720185, + "grad_norm": 0.6859830021858215, + "learning_rate": 7.291666666666666e-07, + "logits/chosen": -0.42501094937324524, + "logits/rejected": -0.42549416422843933, + "logps/chosen": -1.0008373260498047, + "logps/rejected": -1.1157118082046509, + "loss": 1.3294, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.0016746520996094, + "rewards/margins": 0.22974897921085358, + "rewards/rejected": -2.2314236164093018, + "step": 35 + }, + { + "epoch": 0.0761149653121903, + "grad_norm": 0.6468721628189087, + "learning_rate": 7.5e-07, + "logits/chosen": -0.36494994163513184, + "logits/rejected": -0.30433908104896545, + "logps/chosen": -0.9062094688415527, + "logps/rejected": -0.920263409614563, + "loss": 1.4312, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.8124189376831055, + "rewards/margins": 0.02810765616595745, + "rewards/rejected": -1.840526819229126, + "step": 36 + }, + { + "epoch": 0.07822926990419557, + "grad_norm": 0.5085556507110596, + "learning_rate": 7.708333333333333e-07, + "logits/chosen": -0.4677881598472595, + "logits/rejected": -0.456132709980011, + "logps/chosen": -1.0101865530014038, + "logps/rejected": -1.0429682731628418, + "loss": 1.4132, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -2.0203731060028076, + "rewards/margins": 0.06556359678506851, + "rewards/rejected": -2.0859365463256836, + "step": 37 + }, + { + "epoch": 0.08034357449620086, + "grad_norm": 0.23813335597515106, + "learning_rate": 7.916666666666666e-07, + "logits/chosen": -0.3991190791130066, + "logits/rejected": -0.3664044141769409, + "logps/chosen": -0.9578174352645874, + "logps/rejected": -0.9229263067245483, + "loss": 1.4824, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.9156348705291748, + "rewards/margins": -0.06978224962949753, + "rewards/rejected": -1.8458526134490967, + "step": 38 + }, + { + "epoch": 0.08245787908820615, + "grad_norm": 0.587037980556488, + "learning_rate": 8.125e-07, + "logits/chosen": -0.37554049491882324, + "logits/rejected": -0.36305734515190125, + "logps/chosen": -0.8503091931343079, + "logps/rejected": -0.864615261554718, + "loss": 1.4086, + "rewards/accuracies": 0.4296875, + "rewards/chosen": -1.7006183862686157, + "rewards/margins": 0.028611989691853523, + "rewards/rejected": -1.729230523109436, + "step": 39 + }, + { + "epoch": 0.08457218368021142, + "grad_norm": 0.4172501862049103, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.4405443072319031, + "logits/rejected": -0.41723060607910156, + "logps/chosen": -0.8502858877182007, + "logps/rejected": -0.9114271402359009, + "loss": 1.3446, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7005717754364014, + "rewards/margins": 0.12228240817785263, + "rewards/rejected": -1.8228542804718018, + "step": 40 + }, + { + "epoch": 0.08668648827221671, + "grad_norm": 0.9275372624397278, + "learning_rate": 8.541666666666666e-07, + "logits/chosen": -0.4200601577758789, + "logits/rejected": -0.3478623628616333, + "logps/chosen": -0.892408013343811, + "logps/rejected": -0.9276402592658997, + "loss": 1.3887, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.784816026687622, + "rewards/margins": 0.07046431303024292, + "rewards/rejected": -1.8552805185317993, + "step": 41 + }, + { + "epoch": 0.088800792864222, + "grad_norm": 0.7317383289337158, + "learning_rate": 8.75e-07, + "logits/chosen": -0.37675267457962036, + "logits/rejected": -0.33540332317352295, + "logps/chosen": -0.7866061925888062, + "logps/rejected": -0.824250340461731, + "loss": 1.3837, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5732123851776123, + "rewards/margins": 0.07528844475746155, + "rewards/rejected": -1.648500680923462, + "step": 42 + }, + { + "epoch": 0.09091509745622729, + "grad_norm": 0.9452736973762512, + "learning_rate": 8.958333333333334e-07, + "logits/chosen": -0.4662383198738098, + "logits/rejected": -0.4447881579399109, + "logps/chosen": -0.9490666389465332, + "logps/rejected": -1.0112388134002686, + "loss": 1.3412, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8981332778930664, + "rewards/margins": 0.12434446066617966, + "rewards/rejected": -2.022477626800537, + "step": 43 + }, + { + "epoch": 0.09302940204823257, + "grad_norm": 0.2848323881626129, + "learning_rate": 9.166666666666665e-07, + "logits/chosen": -0.41404005885124207, + "logits/rejected": -0.3944583535194397, + "logps/chosen": -0.8224930167198181, + "logps/rejected": -0.8416361808776855, + "loss": 1.4027, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.6449860334396362, + "rewards/margins": 0.038286346942186356, + "rewards/rejected": -1.683272361755371, + "step": 44 + }, + { + "epoch": 0.09514370664023786, + "grad_norm": 0.7165678143501282, + "learning_rate": 9.374999999999999e-07, + "logits/chosen": -0.40475326776504517, + "logits/rejected": -0.3559921383857727, + "logps/chosen": -0.8070214986801147, + "logps/rejected": -0.8993593454360962, + "loss": 1.3148, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6140429973602295, + "rewards/margins": 0.18467575311660767, + "rewards/rejected": -1.7987186908721924, + "step": 45 + }, + { + "epoch": 0.09725801123224315, + "grad_norm": 0.4779021739959717, + "learning_rate": 9.583333333333334e-07, + "logits/chosen": -0.4171525835990906, + "logits/rejected": -0.42166149616241455, + "logps/chosen": -0.7872560024261475, + "logps/rejected": -0.8496187925338745, + "loss": 1.3356, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.574512004852295, + "rewards/margins": 0.12472567707300186, + "rewards/rejected": -1.699237585067749, + "step": 46 + }, + { + "epoch": 0.09937231582424844, + "grad_norm": 0.7870219349861145, + "learning_rate": 9.791666666666667e-07, + "logits/chosen": -0.3734116554260254, + "logits/rejected": -0.32778748869895935, + "logps/chosen": -0.7842286825180054, + "logps/rejected": -0.8161548972129822, + "loss": 1.3647, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5684573650360107, + "rewards/margins": 0.06385258585214615, + "rewards/rejected": -1.6323097944259644, + "step": 47 + }, + { + "epoch": 0.10148662041625371, + "grad_norm": 0.2597256600856781, + "learning_rate": 1e-06, + "logits/chosen": -0.4355677664279938, + "logits/rejected": -0.38983187079429626, + "logps/chosen": -0.8787693977355957, + "logps/rejected": -0.9383041262626648, + "loss": 1.35, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.7575387954711914, + "rewards/margins": 0.11906948685646057, + "rewards/rejected": -1.8766082525253296, + "step": 48 + }, + { + "epoch": 0.103600925008259, + "grad_norm": 0.9942799210548401, + "learning_rate": 9.999862751990697e-07, + "logits/chosen": -0.4244321882724762, + "logits/rejected": -0.4366786777973175, + "logps/chosen": -0.7910157442092896, + "logps/rejected": -0.8630884885787964, + "loss": 1.3166, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.582031488418579, + "rewards/margins": 0.14414538443088531, + "rewards/rejected": -1.7261769771575928, + "step": 49 + }, + { + "epoch": 0.10571522960026429, + "grad_norm": 0.5333903431892395, + "learning_rate": 9.999451015497595e-07, + "logits/chosen": -0.389942467212677, + "logits/rejected": -0.36674585938453674, + "logps/chosen": -0.7312074899673462, + "logps/rejected": -0.7289648652076721, + "loss": 1.4225, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4624149799346924, + "rewards/margins": -0.004485193639993668, + "rewards/rejected": -1.4579297304153442, + "step": 50 + }, + { + "epoch": 0.10782953419226958, + "grad_norm": 0.5712242722511292, + "learning_rate": 9.9987648131247e-07, + "logits/chosen": -0.4622853994369507, + "logits/rejected": -0.3728552460670471, + "logps/chosen": -0.8764299750328064, + "logps/rejected": -0.869678795337677, + "loss": 1.4542, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.7528599500656128, + "rewards/margins": -0.013502337038516998, + "rewards/rejected": -1.739357590675354, + "step": 51 + }, + { + "epoch": 0.10994383878427486, + "grad_norm": 0.2586441934108734, + "learning_rate": 9.99780418254397e-07, + "logits/chosen": -0.37249019742012024, + "logits/rejected": -0.3998304605484009, + "logps/chosen": -0.8435611724853516, + "logps/rejected": -0.9359882473945618, + "loss": 1.3057, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6871223449707031, + "rewards/margins": 0.18485431373119354, + "rewards/rejected": -1.8719764947891235, + "step": 52 + }, + { + "epoch": 0.11205814337628014, + "grad_norm": 1.0829113721847534, + "learning_rate": 9.996569176493268e-07, + "logits/chosen": -0.47697725892066956, + "logits/rejected": -0.4208195209503174, + "logps/chosen": -0.8014968037605286, + "logps/rejected": -0.8703804612159729, + "loss": 1.3523, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6029936075210571, + "rewards/margins": 0.1377674788236618, + "rewards/rejected": -1.7407609224319458, + "step": 53 + }, + { + "epoch": 0.11417244796828543, + "grad_norm": 0.5523208379745483, + "learning_rate": 9.995059862773438e-07, + "logits/chosen": -0.40533363819122314, + "logits/rejected": -0.36801978945732117, + "logps/chosen": -0.7641825675964355, + "logps/rejected": -0.8168596029281616, + "loss": 1.3692, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.528365135192871, + "rewards/margins": 0.10535416752099991, + "rewards/rejected": -1.6337192058563232, + "step": 54 + }, + { + "epoch": 0.11628675256029072, + "grad_norm": 0.614101767539978, + "learning_rate": 9.993276324244605e-07, + "logits/chosen": -0.4476906955242157, + "logits/rejected": -0.40396648645401, + "logps/chosen": -0.8706808090209961, + "logps/rejected": -0.9221430420875549, + "loss": 1.3787, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7413616180419922, + "rewards/margins": 0.10292442888021469, + "rewards/rejected": -1.8442860841751099, + "step": 55 + }, + { + "epoch": 0.118401057152296, + "grad_norm": 0.3428778052330017, + "learning_rate": 9.991218658821608e-07, + "logits/chosen": -0.31709593534469604, + "logits/rejected": -0.2760937213897705, + "logps/chosen": -0.842248797416687, + "logps/rejected": -0.8068034648895264, + "loss": 1.498, + "rewards/accuracies": 0.4375, + "rewards/chosen": -1.684497594833374, + "rewards/margins": -0.07089066505432129, + "rewards/rejected": -1.6136069297790527, + "step": 56 + }, + { + "epoch": 0.12051536174430129, + "grad_norm": 0.6877723932266235, + "learning_rate": 9.988886979468643e-07, + "logits/chosen": -0.41800016164779663, + "logits/rejected": -0.4011584222316742, + "logps/chosen": -0.7845420837402344, + "logps/rejected": -0.834447979927063, + "loss": 1.3491, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5690841674804688, + "rewards/margins": 0.09981165081262589, + "rewards/rejected": -1.668895959854126, + "step": 57 + }, + { + "epoch": 0.12262966633630658, + "grad_norm": 0.9649701714515686, + "learning_rate": 9.98628141419305e-07, + "logits/chosen": -0.4253537058830261, + "logits/rejected": -0.4305458962917328, + "logps/chosen": -0.86476731300354, + "logps/rejected": -0.9080386161804199, + "loss": 1.3639, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.72953462600708, + "rewards/margins": 0.08654248714447021, + "rewards/rejected": -1.8160772323608398, + "step": 58 + }, + { + "epoch": 0.12474397092831185, + "grad_norm": 1.3779780864715576, + "learning_rate": 9.98340210603829e-07, + "logits/chosen": -0.39970022439956665, + "logits/rejected": -0.441428005695343, + "logps/chosen": -0.8662775158882141, + "logps/rejected": -0.9646260738372803, + "loss": 1.3001, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7325550317764282, + "rewards/margins": 0.19669723510742188, + "rewards/rejected": -1.9292521476745605, + "step": 59 + }, + { + "epoch": 0.12685827552031714, + "grad_norm": 0.5366966724395752, + "learning_rate": 9.980249213076084e-07, + "logits/chosen": -0.37770116329193115, + "logits/rejected": -0.35231757164001465, + "logps/chosen": -0.8165755867958069, + "logps/rejected": -0.8619179129600525, + "loss": 1.3699, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6331511735916138, + "rewards/margins": 0.09068439900875092, + "rewards/rejected": -1.723835825920105, + "step": 60 + }, + { + "epoch": 0.12897258011232243, + "grad_norm": 0.36810922622680664, + "learning_rate": 9.976822908397748e-07, + "logits/chosen": -0.4224976897239685, + "logits/rejected": -0.41758257150650024, + "logps/chosen": -0.8445641994476318, + "logps/rejected": -0.9393664598464966, + "loss": 1.3193, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6891283988952637, + "rewards/margins": 0.18960458040237427, + "rewards/rejected": -1.8787329196929932, + "step": 61 + }, + { + "epoch": 0.13108688470432772, + "grad_norm": 0.6838279366493225, + "learning_rate": 9.97312338010468e-07, + "logits/chosen": -0.4168627858161926, + "logits/rejected": -0.36115381121635437, + "logps/chosen": -0.8370552659034729, + "logps/rejected": -0.8352169394493103, + "loss": 1.4284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6741105318069458, + "rewards/margins": -0.0036766715347766876, + "rewards/rejected": -1.6704338788986206, + "step": 62 + }, + { + "epoch": 0.133201189296333, + "grad_norm": 0.39330533146858215, + "learning_rate": 9.969150831298037e-07, + "logits/chosen": -0.4558233618736267, + "logits/rejected": -0.4025765061378479, + "logps/chosen": -0.826255738735199, + "logps/rejected": -0.894213080406189, + "loss": 1.3485, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.652511477470398, + "rewards/margins": 0.13591471314430237, + "rewards/rejected": -1.788426160812378, + "step": 63 + }, + { + "epoch": 0.1353154938883383, + "grad_norm": 0.6055929660797119, + "learning_rate": 9.964905480067584e-07, + "logits/chosen": -0.459463506937027, + "logits/rejected": -0.42943331599235535, + "logps/chosen": -0.7901928424835205, + "logps/rejected": -0.7964221239089966, + "loss": 1.4057, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.580385684967041, + "rewards/margins": 0.012458762153983116, + "rewards/rejected": -1.5928442478179932, + "step": 64 + }, + { + "epoch": 0.13742979848034356, + "grad_norm": 0.37883859872817993, + "learning_rate": 9.960387559479725e-07, + "logits/chosen": -0.4447207450866699, + "logits/rejected": -0.371269553899765, + "logps/chosen": -0.7863065004348755, + "logps/rejected": -0.7983666658401489, + "loss": 1.4202, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.572613000869751, + "rewards/margins": 0.02412020042538643, + "rewards/rejected": -1.5967333316802979, + "step": 65 + }, + { + "epoch": 0.13954410307234885, + "grad_norm": 0.31330156326293945, + "learning_rate": 9.955597317564703e-07, + "logits/chosen": -0.42059677839279175, + "logits/rejected": -0.37605100870132446, + "logps/chosen": -0.7669360637664795, + "logps/rejected": -0.8348797559738159, + "loss": 1.3368, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.533872127532959, + "rewards/margins": 0.13588732481002808, + "rewards/rejected": -1.6697595119476318, + "step": 66 + }, + { + "epoch": 0.14165840766435414, + "grad_norm": 0.4353170096874237, + "learning_rate": 9.950535017302983e-07, + "logits/chosen": -0.3897082805633545, + "logits/rejected": -0.38229796290397644, + "logps/chosen": -0.7249190807342529, + "logps/rejected": -0.7696882486343384, + "loss": 1.3511, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4498381614685059, + "rewards/margins": 0.08953814208507538, + "rewards/rejected": -1.5393764972686768, + "step": 67 + }, + { + "epoch": 0.14377271225635943, + "grad_norm": 0.6724106669425964, + "learning_rate": 9.94520093661082e-07, + "logits/chosen": -0.3687596023082733, + "logits/rejected": -0.34222811460494995, + "logps/chosen": -0.7845972776412964, + "logps/rejected": -0.8308086395263672, + "loss": 1.3737, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5691945552825928, + "rewards/margins": 0.09242270141839981, + "rewards/rejected": -1.6616172790527344, + "step": 68 + }, + { + "epoch": 0.14588701684836472, + "grad_norm": 0.7312172651290894, + "learning_rate": 9.939595368324994e-07, + "logits/chosen": -0.4475817382335663, + "logits/rejected": -0.3975730538368225, + "logps/chosen": -0.7314785718917847, + "logps/rejected": -0.7924487590789795, + "loss": 1.3439, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4629571437835693, + "rewards/margins": 0.12194043397903442, + "rewards/rejected": -1.584897518157959, + "step": 69 + }, + { + "epoch": 0.14800132144037, + "grad_norm": 0.301097571849823, + "learning_rate": 9.933718620186744e-07, + "logits/chosen": -0.402032732963562, + "logits/rejected": -0.3640722632408142, + "logps/chosen": -0.7727882862091064, + "logps/rejected": -0.8291516304016113, + "loss": 1.358, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.545576572418213, + "rewards/margins": 0.11272668838500977, + "rewards/rejected": -1.6583032608032227, + "step": 70 + }, + { + "epoch": 0.1501156260323753, + "grad_norm": 0.377835750579834, + "learning_rate": 9.92757101482486e-07, + "logits/chosen": -0.316825270652771, + "logits/rejected": -0.3245603144168854, + "logps/chosen": -0.7962774634361267, + "logps/rejected": -0.8610175848007202, + "loss": 1.3464, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5925549268722534, + "rewards/margins": 0.12948019802570343, + "rewards/rejected": -1.7220351696014404, + "step": 71 + }, + { + "epoch": 0.1522299306243806, + "grad_norm": 0.84058678150177, + "learning_rate": 9.921152889737984e-07, + "logits/chosen": -0.4446060359477997, + "logits/rejected": -0.43160340189933777, + "logps/chosen": -0.7745426297187805, + "logps/rejected": -0.8286185264587402, + "loss": 1.3408, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.549085259437561, + "rewards/margins": 0.10815180093050003, + "rewards/rejected": -1.6572370529174805, + "step": 72 + }, + { + "epoch": 0.15434423521638585, + "grad_norm": 0.6970808506011963, + "learning_rate": 9.91446459727607e-07, + "logits/chosen": -0.4220297634601593, + "logits/rejected": -0.404453307390213, + "logps/chosen": -0.8769615888595581, + "logps/rejected": -0.9564313292503357, + "loss": 1.3423, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7539231777191162, + "rewards/margins": 0.15893957018852234, + "rewards/rejected": -1.9128626585006714, + "step": 73 + }, + { + "epoch": 0.15645853980839114, + "grad_norm": 0.5900676250457764, + "learning_rate": 9.90750650462105e-07, + "logits/chosen": -0.41884100437164307, + "logits/rejected": -0.38551777601242065, + "logps/chosen": -0.814996063709259, + "logps/rejected": -0.8892688751220703, + "loss": 1.3325, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.629992127418518, + "rewards/margins": 0.14854571223258972, + "rewards/rejected": -1.7785377502441406, + "step": 74 + }, + { + "epoch": 0.15857284440039643, + "grad_norm": 0.7245749831199646, + "learning_rate": 9.900278993766668e-07, + "logits/chosen": -0.3451727330684662, + "logits/rejected": -0.3348972201347351, + "logps/chosen": -0.8788102865219116, + "logps/rejected": -0.9273182153701782, + "loss": 1.377, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7576205730438232, + "rewards/margins": 0.09701582789421082, + "rewards/rejected": -1.8546364307403564, + "step": 75 + }, + { + "epoch": 0.16068714899240172, + "grad_norm": 0.3135383725166321, + "learning_rate": 9.89278246149752e-07, + "logits/chosen": -0.4140404760837555, + "logits/rejected": -0.38082340359687805, + "logps/chosen": -0.7513999342918396, + "logps/rejected": -0.7880118489265442, + "loss": 1.3921, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5027998685836792, + "rewards/margins": 0.07322371751070023, + "rewards/rejected": -1.5760236978530884, + "step": 76 + }, + { + "epoch": 0.162801453584407, + "grad_norm": 0.8594076633453369, + "learning_rate": 9.885017319367252e-07, + "logits/chosen": -0.35951656103134155, + "logits/rejected": -0.30456626415252686, + "logps/chosen": -0.7989844679832458, + "logps/rejected": -0.8145395517349243, + "loss": 1.4061, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5979689359664917, + "rewards/margins": 0.031110182404518127, + "rewards/rejected": -1.6290791034698486, + "step": 77 + }, + { + "epoch": 0.1649157581764123, + "grad_norm": 0.4393538534641266, + "learning_rate": 9.876983993675989e-07, + "logits/chosen": -0.33119240403175354, + "logits/rejected": -0.31970253586769104, + "logps/chosen": -0.721772313117981, + "logps/rejected": -0.7868390679359436, + "loss": 1.3325, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.443544626235962, + "rewards/margins": 0.13013358414173126, + "rewards/rejected": -1.5736781358718872, + "step": 78 + }, + { + "epoch": 0.1670300627684176, + "grad_norm": 0.8017925024032593, + "learning_rate": 9.868682925446909e-07, + "logits/chosen": -0.3608989417552948, + "logits/rejected": -0.3497124910354614, + "logps/chosen": -0.8117240071296692, + "logps/rejected": -0.8506529331207275, + "loss": 1.3671, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6234480142593384, + "rewards/margins": 0.07785768806934357, + "rewards/rejected": -1.701305866241455, + "step": 79 + }, + { + "epoch": 0.16914436736042285, + "grad_norm": 0.5704994201660156, + "learning_rate": 9.860114570402054e-07, + "logits/chosen": -0.37441548705101013, + "logits/rejected": -0.32188406586647034, + "logps/chosen": -0.745419442653656, + "logps/rejected": -0.816170871257782, + "loss": 1.3315, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.490838885307312, + "rewards/margins": 0.1415030062198639, + "rewards/rejected": -1.632341742515564, + "step": 80 + }, + { + "epoch": 0.17125867195242814, + "grad_norm": 3.0005106925964355, + "learning_rate": 9.85127939893729e-07, + "logits/chosen": -0.34791454672813416, + "logits/rejected": -0.32542383670806885, + "logps/chosen": -0.7547991275787354, + "logps/rejected": -0.7868378162384033, + "loss": 1.3742, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5095982551574707, + "rewards/margins": 0.06407731771469116, + "rewards/rejected": -1.5736756324768066, + "step": 81 + }, + { + "epoch": 0.17337297654443343, + "grad_norm": 0.5891271829605103, + "learning_rate": 9.842177896096493e-07, + "logits/chosen": -0.38649702072143555, + "logits/rejected": -0.36892226338386536, + "logps/chosen": -0.7556143999099731, + "logps/rejected": -0.82858806848526, + "loss": 1.3233, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5112287998199463, + "rewards/margins": 0.14594702422618866, + "rewards/rejected": -1.65717613697052, + "step": 82 + }, + { + "epoch": 0.17548728113643872, + "grad_norm": 0.27861157059669495, + "learning_rate": 9.832810561544923e-07, + "logits/chosen": -0.38264670968055725, + "logits/rejected": -0.35908499360084534, + "logps/chosen": -0.7858557105064392, + "logps/rejected": -0.8571599721908569, + "loss": 1.3234, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5717114210128784, + "rewards/margins": 0.1426086127758026, + "rewards/rejected": -1.7143199443817139, + "step": 83 + }, + { + "epoch": 0.177601585728444, + "grad_norm": 0.8158763647079468, + "learning_rate": 9.823177909541793e-07, + "logits/chosen": -0.4076104760169983, + "logits/rejected": -0.3934200704097748, + "logps/chosen": -0.8089872002601624, + "logps/rejected": -0.8885407447814941, + "loss": 1.3476, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6179744005203247, + "rewards/margins": 0.15910708904266357, + "rewards/rejected": -1.7770814895629883, + "step": 84 + }, + { + "epoch": 0.1797158903204493, + "grad_norm": 0.46008333563804626, + "learning_rate": 9.813280468912022e-07, + "logits/chosen": -0.33124151825904846, + "logits/rejected": -0.34535717964172363, + "logps/chosen": -0.733020544052124, + "logps/rejected": -0.8716557621955872, + "loss": 1.2807, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.466041088104248, + "rewards/margins": 0.27727028727531433, + "rewards/rejected": -1.7433115243911743, + "step": 85 + }, + { + "epoch": 0.18183019491245458, + "grad_norm": 0.3784334659576416, + "learning_rate": 9.80311878301722e-07, + "logits/chosen": -0.40713849663734436, + "logits/rejected": -0.3808574080467224, + "logps/chosen": -0.7063947319984436, + "logps/rejected": -0.7589148879051208, + "loss": 1.3501, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4127894639968872, + "rewards/margins": 0.10504024475812912, + "rewards/rejected": -1.5178297758102417, + "step": 86 + }, + { + "epoch": 0.18394449950445987, + "grad_norm": 0.6137813329696655, + "learning_rate": 9.792693409725853e-07, + "logits/chosen": -0.4119255542755127, + "logits/rejected": -0.44221603870391846, + "logps/chosen": -0.795850932598114, + "logps/rejected": -0.8925026059150696, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.591701865196228, + "rewards/margins": 0.19330324232578278, + "rewards/rejected": -1.7850052118301392, + "step": 87 + }, + { + "epoch": 0.18605880409646514, + "grad_norm": 0.5354250073432922, + "learning_rate": 9.78200492138261e-07, + "logits/chosen": -0.3792279064655304, + "logits/rejected": -0.3789527714252472, + "logps/chosen": -0.7249161005020142, + "logps/rejected": -0.8088154792785645, + "loss": 1.3304, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4498322010040283, + "rewards/margins": 0.16779886186122894, + "rewards/rejected": -1.617630958557129, + "step": 88 + }, + { + "epoch": 0.18817310868847043, + "grad_norm": 0.6911243796348572, + "learning_rate": 9.771053904776995e-07, + "logits/chosen": -0.38837429881095886, + "logits/rejected": -0.36597418785095215, + "logps/chosen": -0.7528612017631531, + "logps/rejected": -0.7981135249137878, + "loss": 1.3481, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.5057224035263062, + "rewards/margins": 0.09050464630126953, + "rewards/rejected": -1.5962270498275757, + "step": 89 + }, + { + "epoch": 0.19028741328047571, + "grad_norm": 0.37110790610313416, + "learning_rate": 9.759840961111097e-07, + "logits/chosen": -0.3804919421672821, + "logits/rejected": -0.38750600814819336, + "logps/chosen": -0.8673248291015625, + "logps/rejected": -0.9381619691848755, + "loss": 1.3303, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.734649658203125, + "rewards/margins": 0.14167429506778717, + "rewards/rejected": -1.876323938369751, + "step": 90 + }, + { + "epoch": 0.192401717872481, + "grad_norm": 0.8033086657524109, + "learning_rate": 9.748366705966593e-07, + "logits/chosen": -0.3804866075515747, + "logits/rejected": -0.31055447459220886, + "logps/chosen": -0.7535511255264282, + "logps/rejected": -0.7824290990829468, + "loss": 1.3706, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5071022510528564, + "rewards/margins": 0.057755980640649796, + "rewards/rejected": -1.5648581981658936, + "step": 91 + }, + { + "epoch": 0.1945160224644863, + "grad_norm": 0.6934167742729187, + "learning_rate": 9.736631769270957e-07, + "logits/chosen": -0.443461149930954, + "logits/rejected": -0.4398806691169739, + "logps/chosen": -0.8123858571052551, + "logps/rejected": -0.8972252607345581, + "loss": 1.3464, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6247717142105103, + "rewards/margins": 0.16967862844467163, + "rewards/rejected": -1.7944505214691162, + "step": 92 + }, + { + "epoch": 0.19663032705649158, + "grad_norm": 1.140067458152771, + "learning_rate": 9.724636795262866e-07, + "logits/chosen": -0.43793433904647827, + "logits/rejected": -0.4402340352535248, + "logps/chosen": -0.8155819177627563, + "logps/rejected": -0.8659977912902832, + "loss": 1.3621, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6311638355255127, + "rewards/margins": 0.10083187371492386, + "rewards/rejected": -1.7319955825805664, + "step": 93 + }, + { + "epoch": 0.19874463164849687, + "grad_norm": 1.8303897380828857, + "learning_rate": 9.712382442456844e-07, + "logits/chosen": -0.34288379549980164, + "logits/rejected": -0.36632782220840454, + "logps/chosen": -0.7338054776191711, + "logps/rejected": -0.8537961840629578, + "loss": 1.2942, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4676109552383423, + "rewards/margins": 0.23998141288757324, + "rewards/rejected": -1.7075923681259155, + "step": 94 + }, + { + "epoch": 0.20085893624050213, + "grad_norm": 0.34392252564430237, + "learning_rate": 9.6998693836071e-07, + "logits/chosen": -0.4381723999977112, + "logits/rejected": -0.4031081199645996, + "logps/chosen": -0.7130292057991028, + "logps/rejected": -0.7402217388153076, + "loss": 1.3766, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4260584115982056, + "rewards/margins": 0.0543849840760231, + "rewards/rejected": -1.4804434776306152, + "step": 95 + }, + { + "epoch": 0.20297324083250742, + "grad_norm": 0.4129842519760132, + "learning_rate": 9.687098305670604e-07, + "logits/chosen": -0.39796924591064453, + "logits/rejected": -0.3476859927177429, + "logps/chosen": -0.7520885467529297, + "logps/rejected": -0.8058558702468872, + "loss": 1.3663, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5041770935058594, + "rewards/margins": 0.10753461718559265, + "rewards/rejected": -1.6117117404937744, + "step": 96 + }, + { + "epoch": 0.2050875454245127, + "grad_norm": 0.5054985284805298, + "learning_rate": 9.674069909769362e-07, + "logits/chosen": -0.3942393660545349, + "logits/rejected": -0.3627544045448303, + "logps/chosen": -0.733702540397644, + "logps/rejected": -0.781308650970459, + "loss": 1.342, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.467405080795288, + "rewards/margins": 0.09521210938692093, + "rewards/rejected": -1.562617301940918, + "step": 97 + }, + { + "epoch": 0.207201850016518, + "grad_norm": 0.6975870728492737, + "learning_rate": 9.66078491115194e-07, + "logits/chosen": -0.38557127118110657, + "logits/rejected": -0.3581204414367676, + "logps/chosen": -0.7359838485717773, + "logps/rejected": -0.7648134827613831, + "loss": 1.3841, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.4719676971435547, + "rewards/margins": 0.057659298181533813, + "rewards/rejected": -1.5296269655227661, + "step": 98 + }, + { + "epoch": 0.2093161546085233, + "grad_norm": 0.7575029730796814, + "learning_rate": 9.647244039154177e-07, + "logits/chosen": -0.3871467411518097, + "logits/rejected": -0.3941374123096466, + "logps/chosen": -0.6516871452331543, + "logps/rejected": -0.7066073417663574, + "loss": 1.3364, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3033742904663086, + "rewards/margins": 0.10984040796756744, + "rewards/rejected": -1.4132146835327148, + "step": 99 + }, + { + "epoch": 0.21143045920052858, + "grad_norm": 1.3344250917434692, + "learning_rate": 9.633448037159166e-07, + "logits/chosen": -0.40887755155563354, + "logits/rejected": -0.41733911633491516, + "logps/chosen": -0.6978950500488281, + "logps/rejected": -0.793424129486084, + "loss": 1.3076, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3957901000976562, + "rewards/margins": 0.1910584717988968, + "rewards/rejected": -1.586848258972168, + "step": 100 + }, + { + "epoch": 0.21354476379253387, + "grad_norm": 0.8798456788063049, + "learning_rate": 9.619397662556433e-07, + "logits/chosen": -0.302534282207489, + "logits/rejected": -0.29954588413238525, + "logps/chosen": -0.719552755355835, + "logps/rejected": -0.7628123164176941, + "loss": 1.3699, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.43910551071167, + "rewards/margins": 0.08651915192604065, + "rewards/rejected": -1.5256246328353882, + "step": 101 + }, + { + "epoch": 0.21565906838453916, + "grad_norm": 0.8746365308761597, + "learning_rate": 9.605093686700353e-07, + "logits/chosen": -0.372263640165329, + "logits/rejected": -0.3714321255683899, + "logps/chosen": -0.6665956974029541, + "logps/rejected": -0.7361368536949158, + "loss": 1.3173, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3331913948059082, + "rewards/margins": 0.1390824019908905, + "rewards/rejected": -1.4722737073898315, + "step": 102 + }, + { + "epoch": 0.21777337297654442, + "grad_norm": 0.8450930714607239, + "learning_rate": 9.590536894867812e-07, + "logits/chosen": -0.37228280305862427, + "logits/rejected": -0.37763556838035583, + "logps/chosen": -0.7425979375839233, + "logps/rejected": -0.7557005882263184, + "loss": 1.4085, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4851958751678467, + "rewards/margins": 0.026205357164144516, + "rewards/rejected": -1.5114011764526367, + "step": 103 + }, + { + "epoch": 0.2198876775685497, + "grad_norm": 0.5075035691261292, + "learning_rate": 9.575728086215091e-07, + "logits/chosen": -0.4433964788913727, + "logits/rejected": -0.3782787024974823, + "logps/chosen": -0.7308244109153748, + "logps/rejected": -0.8043883442878723, + "loss": 1.3353, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4616488218307495, + "rewards/margins": 0.14712783694267273, + "rewards/rejected": -1.6087766885757446, + "step": 104 + }, + { + "epoch": 0.222001982160555, + "grad_norm": 1.0270946025848389, + "learning_rate": 9.560668073733993e-07, + "logits/chosen": -0.3593980073928833, + "logits/rejected": -0.3159312903881073, + "logps/chosen": -0.757469892501831, + "logps/rejected": -0.8256179094314575, + "loss": 1.3289, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.514939785003662, + "rewards/margins": 0.13629598915576935, + "rewards/rejected": -1.651235818862915, + "step": 105 + }, + { + "epoch": 0.2241162867525603, + "grad_norm": 0.8390078544616699, + "learning_rate": 9.54535768420721e-07, + "logits/chosen": -0.3266332149505615, + "logits/rejected": -0.3008713722229004, + "logps/chosen": -0.7286102771759033, + "logps/rejected": -0.7803273797035217, + "loss": 1.3593, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4572205543518066, + "rewards/margins": 0.10343428701162338, + "rewards/rejected": -1.5606547594070435, + "step": 106 + }, + { + "epoch": 0.22623059134456558, + "grad_norm": 1.130595088005066, + "learning_rate": 9.529797758162934e-07, + "logits/chosen": -0.36109817028045654, + "logits/rejected": -0.34797021746635437, + "logps/chosen": -0.7723361253738403, + "logps/rejected": -0.8873662352561951, + "loss": 1.2956, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5446722507476807, + "rewards/margins": 0.23006024956703186, + "rewards/rejected": -1.7747324705123901, + "step": 107 + }, + { + "epoch": 0.22834489593657087, + "grad_norm": 0.9399718642234802, + "learning_rate": 9.513989149828717e-07, + "logits/chosen": -0.3596777021884918, + "logits/rejected": -0.3660539388656616, + "logps/chosen": -0.7130635976791382, + "logps/rejected": -0.7378955483436584, + "loss": 1.3774, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4261271953582764, + "rewards/margins": 0.04966379329562187, + "rewards/rejected": -1.475791096687317, + "step": 108 + }, + { + "epoch": 0.23045920052857616, + "grad_norm": 1.097594976425171, + "learning_rate": 9.49793272708457e-07, + "logits/chosen": -0.31783169507980347, + "logits/rejected": -0.3008044362068176, + "logps/chosen": -0.6933202147483826, + "logps/rejected": -0.7510000467300415, + "loss": 1.3498, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3866404294967651, + "rewards/margins": 0.1153596043586731, + "rewards/rejected": -1.502000093460083, + "step": 109 + }, + { + "epoch": 0.23257350512058145, + "grad_norm": 1.096330165863037, + "learning_rate": 9.481629371415313e-07, + "logits/chosen": -0.3582899570465088, + "logits/rejected": -0.3120020031929016, + "logps/chosen": -0.817268431186676, + "logps/rejected": -0.8862374424934387, + "loss": 1.3349, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.634536862373352, + "rewards/margins": 0.13793781399726868, + "rewards/rejected": -1.7724748849868774, + "step": 110 + }, + { + "epoch": 0.2346878097125867, + "grad_norm": 0.8261978626251221, + "learning_rate": 9.465079977862192e-07, + "logits/chosen": -0.41336673498153687, + "logits/rejected": -0.39544352889060974, + "logps/chosen": -0.7673372030258179, + "logps/rejected": -0.8331737518310547, + "loss": 1.3373, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5346744060516357, + "rewards/margins": 0.1316729635000229, + "rewards/rejected": -1.6663475036621094, + "step": 111 + }, + { + "epoch": 0.236802114304592, + "grad_norm": 0.5922806262969971, + "learning_rate": 9.448285454973737e-07, + "logits/chosen": -0.3224758207798004, + "logits/rejected": -0.3118049204349518, + "logps/chosen": -0.7584627866744995, + "logps/rejected": -0.8859898447990417, + "loss": 1.2731, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.516925573348999, + "rewards/margins": 0.2550540566444397, + "rewards/rejected": -1.7719796895980835, + "step": 112 + }, + { + "epoch": 0.2389164188965973, + "grad_norm": 1.3172541856765747, + "learning_rate": 9.431246724755877e-07, + "logits/chosen": -0.4287208318710327, + "logits/rejected": -0.3984590172767639, + "logps/chosen": -0.7587048411369324, + "logps/rejected": -0.7860502004623413, + "loss": 1.3832, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5174096822738647, + "rewards/margins": 0.05469079315662384, + "rewards/rejected": -1.5721004009246826, + "step": 113 + }, + { + "epoch": 0.24103072348860258, + "grad_norm": 0.7749882340431213, + "learning_rate": 9.413964722621337e-07, + "logits/chosen": -0.39085906744003296, + "logits/rejected": -0.3316206932067871, + "logps/chosen": -0.7035898566246033, + "logps/rejected": -0.7375759482383728, + "loss": 1.387, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4071797132492065, + "rewards/margins": 0.06797221302986145, + "rewards/rejected": -1.4751518964767456, + "step": 114 + }, + { + "epoch": 0.24314502808060787, + "grad_norm": 1.0914056301116943, + "learning_rate": 9.396440397338272e-07, + "logits/chosen": -0.38826486468315125, + "logits/rejected": -0.35520774126052856, + "logps/chosen": -0.7385872602462769, + "logps/rejected": -0.7974889278411865, + "loss": 1.3477, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4771745204925537, + "rewards/margins": 0.11780343949794769, + "rewards/rejected": -1.594977855682373, + "step": 115 + }, + { + "epoch": 0.24525933267261316, + "grad_norm": 1.2966018915176392, + "learning_rate": 9.378674710978183e-07, + "logits/chosen": -0.36493802070617676, + "logits/rejected": -0.34763696789741516, + "logps/chosen": -0.6731826663017273, + "logps/rejected": -0.7645149827003479, + "loss": 1.3, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3463653326034546, + "rewards/margins": 0.18266455829143524, + "rewards/rejected": -1.5290299654006958, + "step": 116 + }, + { + "epoch": 0.24737363726461845, + "grad_norm": 0.49401605129241943, + "learning_rate": 9.360668638863109e-07, + "logits/chosen": -0.40416795015335083, + "logits/rejected": -0.3815993070602417, + "logps/chosen": -0.719497799873352, + "logps/rejected": -0.7588324546813965, + "loss": 1.3621, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.438995599746704, + "rewards/margins": 0.07866920530796051, + "rewards/rejected": -1.517664909362793, + "step": 117 + }, + { + "epoch": 0.2494879418566237, + "grad_norm": 1.0603238344192505, + "learning_rate": 9.342423169512071e-07, + "logits/chosen": -0.3857055604457855, + "logits/rejected": -0.3524513244628906, + "logps/chosen": -0.7373769283294678, + "logps/rejected": -0.7971038818359375, + "loss": 1.3358, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4747538566589355, + "rewards/margins": 0.11945393681526184, + "rewards/rejected": -1.594207763671875, + "step": 118 + }, + { + "epoch": 0.251602246448629, + "grad_norm": 0.9880490303039551, + "learning_rate": 9.323939304586804e-07, + "logits/chosen": -0.31455785036087036, + "logits/rejected": -0.3102484941482544, + "logps/chosen": -0.7276102900505066, + "logps/rejected": -0.7446941137313843, + "loss": 1.3928, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.4552205801010132, + "rewards/margins": 0.034167706966400146, + "rewards/rejected": -1.4893882274627686, + "step": 119 + }, + { + "epoch": 0.2537165510406343, + "grad_norm": 1.0419566631317139, + "learning_rate": 9.305218058836776e-07, + "logits/chosen": -0.38093918561935425, + "logits/rejected": -0.3588898181915283, + "logps/chosen": -0.715582013130188, + "logps/rejected": -0.8271002769470215, + "loss": 1.2934, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.431164026260376, + "rewards/margins": 0.22303667664527893, + "rewards/rejected": -1.654200553894043, + "step": 120 + }, + { + "epoch": 0.2558308556326396, + "grad_norm": 0.657620370388031, + "learning_rate": 9.286260460043473e-07, + "logits/chosen": -0.45690783858299255, + "logits/rejected": -0.4082674980163574, + "logps/chosen": -0.6932571530342102, + "logps/rejected": -0.7631082534790039, + "loss": 1.3398, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3865143060684204, + "rewards/margins": 0.13970226049423218, + "rewards/rejected": -1.5262165069580078, + "step": 121 + }, + { + "epoch": 0.25794516022464486, + "grad_norm": 0.983686089515686, + "learning_rate": 9.267067548963974e-07, + "logits/chosen": -0.40266987681388855, + "logits/rejected": -0.37586671113967896, + "logps/chosen": -0.7362720966339111, + "logps/rejected": -0.7538987398147583, + "loss": 1.4066, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4725441932678223, + "rewards/margins": 0.03525342047214508, + "rewards/rejected": -1.5077974796295166, + "step": 122 + }, + { + "epoch": 0.26005946481665015, + "grad_norm": 1.0076361894607544, + "learning_rate": 9.24764037927381e-07, + "logits/chosen": -0.4461461007595062, + "logits/rejected": -0.40700826048851013, + "logps/chosen": -0.7206646800041199, + "logps/rejected": -0.7489192485809326, + "loss": 1.3759, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4413293600082397, + "rewards/margins": 0.05650928616523743, + "rewards/rejected": -1.4978384971618652, + "step": 123 + }, + { + "epoch": 0.26217376940865544, + "grad_norm": 0.933315098285675, + "learning_rate": 9.22798001750913e-07, + "logits/chosen": -0.3966676890850067, + "logits/rejected": -0.3572196960449219, + "logps/chosen": -0.7075096368789673, + "logps/rejected": -0.7406759262084961, + "loss": 1.3667, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4150192737579346, + "rewards/margins": 0.06633266806602478, + "rewards/rejected": -1.4813518524169922, + "step": 124 + }, + { + "epoch": 0.26428807400066073, + "grad_norm": 0.6277392506599426, + "learning_rate": 9.20808754300814e-07, + "logits/chosen": -0.3555490970611572, + "logits/rejected": -0.35786163806915283, + "logps/chosen": -0.7549921274185181, + "logps/rejected": -0.832869291305542, + "loss": 1.3175, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.5099842548370361, + "rewards/margins": 0.15575438737869263, + "rewards/rejected": -1.665738582611084, + "step": 125 + }, + { + "epoch": 0.266402378592666, + "grad_norm": 0.7172744274139404, + "learning_rate": 9.18796404785185e-07, + "logits/chosen": -0.41230690479278564, + "logits/rejected": -0.39935630559921265, + "logps/chosen": -0.7129833698272705, + "logps/rejected": -0.7888559103012085, + "loss": 1.3167, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.425966739654541, + "rewards/margins": 0.15174514055252075, + "rewards/rejected": -1.577711820602417, + "step": 126 + }, + { + "epoch": 0.2685166831846713, + "grad_norm": 1.2629508972167969, + "learning_rate": 9.16761063680412e-07, + "logits/chosen": -0.36754000186920166, + "logits/rejected": -0.3541562259197235, + "logps/chosen": -0.6992133855819702, + "logps/rejected": -0.7668892741203308, + "loss": 1.3735, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3984267711639404, + "rewards/margins": 0.13535188138484955, + "rewards/rejected": -1.5337785482406616, + "step": 127 + }, + { + "epoch": 0.2706309877766766, + "grad_norm": 0.7024405598640442, + "learning_rate": 9.147028427251009e-07, + "logits/chosen": -0.4014585018157959, + "logits/rejected": -0.40560898184776306, + "logps/chosen": -0.727234959602356, + "logps/rejected": -0.8070081472396851, + "loss": 1.3138, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.454469919204712, + "rewards/margins": 0.15954652428627014, + "rewards/rejected": -1.6140162944793701, + "step": 128 + } + ], + "logging_steps": 1, + "max_steps": 472, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-192/config.json b/checkpoint-192/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ead276a9e0bbd4a0baae360715d8e80853974eb6 --- /dev/null +++ b/checkpoint-192/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "/home/v-liuzichuan/huggingface/Llama-3.2-3B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/checkpoint-192/generation_config.json b/checkpoint-192/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18d16b4cc3c799dd78e854fd824ddf8a2996285 --- /dev/null +++ b/checkpoint-192/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.2" +} diff --git a/checkpoint-192/model-00001-of-00002.safetensors b/checkpoint-192/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35aba8d52634b19b4d5b95bb2dd895ef38db541b --- /dev/null +++ b/checkpoint-192/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b7f49c9ace6c25e70a8b94e4baf1b47e8f1db18367b2a44485bca12d040572 +size 4965799096 diff --git a/checkpoint-192/model-00002-of-00002.safetensors b/checkpoint-192/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c137bc3a06f0dd6ab489a117452af8f9f48fa12 --- /dev/null +++ b/checkpoint-192/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57bc460cfdbde7784aa1e7738a391b34b09965d72253011ec3e507ab48e9495 +size 2247734992 diff --git a/checkpoint-192/model.safetensors.index.json b/checkpoint-192/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ed64de846d720b9a7859dc20575fea8e8ca51940 --- /dev/null +++ b/checkpoint-192/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_size": 7213504512 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-192/special_tokens_map.json b/checkpoint-192/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-192/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-192/tokenizer.json b/checkpoint-192/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-192/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-192/tokenizer_config.json b/checkpoint-192/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdb7bfcc59fd293867d98e124a30b3ddf39c93 --- /dev/null +++ b/checkpoint-192/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-192/trainer_state.json b/checkpoint-192/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..57e0963ec90c88828d828b64e9ca2554553eea10 --- /dev/null +++ b/checkpoint-192/trainer_state.json @@ -0,0 +1,2913 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.40594648166501485, + "eval_steps": 500, + "global_step": 192, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002114304592005286, + "grad_norm": 1.5018059015274048, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.3466828167438507, + "logits/rejected": -0.30099987983703613, + "logps/chosen": -0.9345186948776245, + "logps/rejected": -0.9117153882980347, + "loss": 1.4889, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.869037389755249, + "rewards/margins": -0.04560665041208267, + "rewards/rejected": -1.8234307765960693, + "step": 1 + }, + { + "epoch": 0.004228609184010572, + "grad_norm": 0.8093975186347961, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.4310421049594879, + "logits/rejected": -0.39132067561149597, + "logps/chosen": -0.8198825716972351, + "logps/rejected": -0.8644211888313293, + "loss": 1.376, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.6397651433944702, + "rewards/margins": 0.08907715976238251, + "rewards/rejected": -1.7288423776626587, + "step": 2 + }, + { + "epoch": 0.006342913776015857, + "grad_norm": 0.5377389788627625, + "learning_rate": 6.25e-08, + "logits/chosen": -0.46692028641700745, + "logits/rejected": -0.4649256467819214, + "logps/chosen": -0.9087910652160645, + "logps/rejected": -0.9648240804672241, + "loss": 1.3404, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.817582130432129, + "rewards/margins": 0.11206617206335068, + "rewards/rejected": -1.9296481609344482, + "step": 3 + }, + { + "epoch": 0.008457218368021144, + "grad_norm": 0.3221875727176666, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.416828453540802, + "logits/rejected": -0.3584724962711334, + "logps/chosen": -0.7818898558616638, + "logps/rejected": -0.8170815110206604, + "loss": 1.3806, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.5637797117233276, + "rewards/margins": 0.07038339227437973, + "rewards/rejected": -1.6341630220413208, + "step": 4 + }, + { + "epoch": 0.010571522960026428, + "grad_norm": 0.64655601978302, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.376886248588562, + "logits/rejected": -0.3516141474246979, + "logps/chosen": -0.8814125061035156, + "logps/rejected": -1.0214396715164185, + "loss": 1.2741, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7628250122070312, + "rewards/margins": 0.28005433082580566, + "rewards/rejected": -2.042879343032837, + "step": 5 + }, + { + "epoch": 0.012685827552031714, + "grad_norm": 0.4775894582271576, + "learning_rate": 1.25e-07, + "logits/chosen": -0.4757865369319916, + "logits/rejected": -0.4498941898345947, + "logps/chosen": -0.8962199687957764, + "logps/rejected": -0.9462199807167053, + "loss": 1.364, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.7924399375915527, + "rewards/margins": 0.10000008344650269, + "rewards/rejected": -1.8924399614334106, + "step": 6 + }, + { + "epoch": 0.014800132144037, + "grad_norm": 1.2459568977355957, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.38895344734191895, + "logits/rejected": -0.38165366649627686, + "logps/chosen": -0.9025766253471375, + "logps/rejected": -0.9465017318725586, + "loss": 1.3898, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.805153250694275, + "rewards/margins": 0.0878501906991005, + "rewards/rejected": -1.8930034637451172, + "step": 7 + }, + { + "epoch": 0.016914436736042288, + "grad_norm": 0.6195729374885559, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.3964853286743164, + "logits/rejected": -0.377862811088562, + "logps/chosen": -0.9054160118103027, + "logps/rejected": -0.9605879187583923, + "loss": 1.3821, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8108320236206055, + "rewards/margins": 0.1103438138961792, + "rewards/rejected": -1.9211758375167847, + "step": 8 + }, + { + "epoch": 0.019028741328047574, + "grad_norm": 1.2074137926101685, + "learning_rate": 1.875e-07, + "logits/chosen": -0.3729037344455719, + "logits/rejected": -0.38143450021743774, + "logps/chosen": -0.9328653216362, + "logps/rejected": -0.9905799627304077, + "loss": 1.3754, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8657306432724, + "rewards/margins": 0.11542946100234985, + "rewards/rejected": -1.9811599254608154, + "step": 9 + }, + { + "epoch": 0.021143045920052856, + "grad_norm": 0.2867220640182495, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.4263336658477783, + "logits/rejected": -0.42903271317481995, + "logps/chosen": -0.8979260325431824, + "logps/rejected": -0.9078099727630615, + "loss": 1.4438, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7958520650863647, + "rewards/margins": 0.019767940044403076, + "rewards/rejected": -1.815619945526123, + "step": 10 + }, + { + "epoch": 0.023257350512058142, + "grad_norm": 0.8363026976585388, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.3374914526939392, + "logits/rejected": -0.32399696111679077, + "logps/chosen": -0.8886098861694336, + "logps/rejected": -0.9484556317329407, + "loss": 1.3422, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7772197723388672, + "rewards/margins": 0.11969132721424103, + "rewards/rejected": -1.8969112634658813, + "step": 11 + }, + { + "epoch": 0.025371655104063428, + "grad_norm": 0.5406804084777832, + "learning_rate": 2.5e-07, + "logits/chosen": -0.42844679951667786, + "logits/rejected": -0.37984615564346313, + "logps/chosen": -0.861629843711853, + "logps/rejected": -0.8968492150306702, + "loss": 1.3922, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.723259687423706, + "rewards/margins": 0.07043875753879547, + "rewards/rejected": -1.7936984300613403, + "step": 12 + }, + { + "epoch": 0.027485959696068714, + "grad_norm": 0.9919329285621643, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.36495402455329895, + "logits/rejected": -0.3249490261077881, + "logps/chosen": -0.8502095937728882, + "logps/rejected": -0.8470643758773804, + "loss": 1.4334, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7004191875457764, + "rewards/margins": -0.006290358491241932, + "rewards/rejected": -1.6941287517547607, + "step": 13 + }, + { + "epoch": 0.029600264288074, + "grad_norm": 0.5477162003517151, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.4155704081058502, + "logits/rejected": -0.39535820484161377, + "logps/chosen": -1.0430240631103516, + "logps/rejected": -1.1318373680114746, + "loss": 1.3533, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.086048126220703, + "rewards/margins": 0.17762640118598938, + "rewards/rejected": -2.263674736022949, + "step": 14 + }, + { + "epoch": 0.031714568880079286, + "grad_norm": 0.26530712842941284, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.4810572564601898, + "logits/rejected": -0.42454615235328674, + "logps/chosen": -0.8741041421890259, + "logps/rejected": -0.9494178295135498, + "loss": 1.3655, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7482082843780518, + "rewards/margins": 0.15062758326530457, + "rewards/rejected": -1.8988356590270996, + "step": 15 + }, + { + "epoch": 0.033828873472084575, + "grad_norm": 0.9272629618644714, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.4440098702907562, + "logits/rejected": -0.3930297791957855, + "logps/chosen": -0.8473359942436218, + "logps/rejected": -0.9369213581085205, + "loss": 1.3248, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6946719884872437, + "rewards/margins": 0.17917080223560333, + "rewards/rejected": -1.873842716217041, + "step": 16 + }, + { + "epoch": 0.03594317806408986, + "grad_norm": 0.5912418961524963, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.3838099539279938, + "logits/rejected": -0.3507584035396576, + "logps/chosen": -0.8888350129127502, + "logps/rejected": -0.9361770749092102, + "loss": 1.383, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.7776700258255005, + "rewards/margins": 0.0946839451789856, + "rewards/rejected": -1.8723541498184204, + "step": 17 + }, + { + "epoch": 0.03805748265609515, + "grad_norm": 0.6536504030227661, + "learning_rate": 3.75e-07, + "logits/chosen": -0.3581697940826416, + "logits/rejected": -0.3620460629463196, + "logps/chosen": -0.8519617319107056, + "logps/rejected": -0.9022184610366821, + "loss": 1.3841, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7039234638214111, + "rewards/margins": 0.10051343590021133, + "rewards/rejected": -1.8044369220733643, + "step": 18 + }, + { + "epoch": 0.04017178724810043, + "grad_norm": 0.3433632552623749, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.37887442111968994, + "logits/rejected": -0.37543320655822754, + "logps/chosen": -0.9464104175567627, + "logps/rejected": -1.0017329454421997, + "loss": 1.3649, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8928208351135254, + "rewards/margins": 0.11064518243074417, + "rewards/rejected": -2.0034658908843994, + "step": 19 + }, + { + "epoch": 0.04228609184010571, + "grad_norm": 0.9764007329940796, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.44110679626464844, + "logits/rejected": -0.4280649721622467, + "logps/chosen": -0.9046768546104431, + "logps/rejected": -1.0464633703231812, + "loss": 1.2592, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.8093537092208862, + "rewards/margins": 0.2835729420185089, + "rewards/rejected": -2.0929267406463623, + "step": 20 + }, + { + "epoch": 0.044400396432111, + "grad_norm": 1.8563830852508545, + "learning_rate": 4.375e-07, + "logits/chosen": -0.45183491706848145, + "logits/rejected": -0.42935287952423096, + "logps/chosen": -0.9043138027191162, + "logps/rejected": -0.9462392926216125, + "loss": 1.3784, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.8086276054382324, + "rewards/margins": 0.08385094255208969, + "rewards/rejected": -1.892478585243225, + "step": 21 + }, + { + "epoch": 0.046514701024116284, + "grad_norm": 1.3473299741744995, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.37855517864227295, + "logits/rejected": -0.34429043531417847, + "logps/chosen": -0.9284683465957642, + "logps/rejected": -0.9454050064086914, + "loss": 1.4346, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.8569366931915283, + "rewards/margins": 0.03387312963604927, + "rewards/rejected": -1.8908100128173828, + "step": 22 + }, + { + "epoch": 0.04862900561612157, + "grad_norm": 0.940831184387207, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.39172160625457764, + "logits/rejected": -0.3695780634880066, + "logps/chosen": -0.9314202666282654, + "logps/rejected": -1.020229697227478, + "loss": 1.3322, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8628405332565308, + "rewards/margins": 0.17761869728565216, + "rewards/rejected": -2.040459394454956, + "step": 23 + }, + { + "epoch": 0.050743310208126856, + "grad_norm": 0.5783158540725708, + "learning_rate": 5e-07, + "logits/chosen": -0.4958629608154297, + "logits/rejected": -0.4257377088069916, + "logps/chosen": -0.9379237294197083, + "logps/rejected": -0.9415461421012878, + "loss": 1.441, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.8758474588394165, + "rewards/margins": 0.0072449808940291405, + "rewards/rejected": -1.8830922842025757, + "step": 24 + }, + { + "epoch": 0.052857614800132145, + "grad_norm": 1.4209853410720825, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -0.36407172679901123, + "logits/rejected": -0.3331725299358368, + "logps/chosen": -0.9192589521408081, + "logps/rejected": -0.9595308899879456, + "loss": 1.3994, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8385179042816162, + "rewards/margins": 0.080544114112854, + "rewards/rejected": -1.9190617799758911, + "step": 25 + }, + { + "epoch": 0.05497191939213743, + "grad_norm": 0.6310216188430786, + "learning_rate": 5.416666666666666e-07, + "logits/chosen": -0.41772690415382385, + "logits/rejected": -0.36565953493118286, + "logps/chosen": -0.8052878379821777, + "logps/rejected": -0.8673746585845947, + "loss": 1.3356, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6105756759643555, + "rewards/margins": 0.12417369335889816, + "rewards/rejected": -1.7347493171691895, + "step": 26 + }, + { + "epoch": 0.05708622398414272, + "grad_norm": 1.2933462858200073, + "learning_rate": 5.625e-07, + "logits/chosen": -0.4482795000076294, + "logits/rejected": -0.39409321546554565, + "logps/chosen": -0.8339261412620544, + "logps/rejected": -0.8675202131271362, + "loss": 1.3739, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.6678522825241089, + "rewards/margins": 0.06718815863132477, + "rewards/rejected": -1.7350404262542725, + "step": 27 + }, + { + "epoch": 0.059200528576148, + "grad_norm": 0.5808025002479553, + "learning_rate": 5.833333333333334e-07, + "logits/chosen": -0.37116044759750366, + "logits/rejected": -0.3478051722049713, + "logps/chosen": -0.8950318694114685, + "logps/rejected": -0.9756672978401184, + "loss": 1.3505, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.790063738822937, + "rewards/margins": 0.1612708568572998, + "rewards/rejected": -1.9513345956802368, + "step": 28 + }, + { + "epoch": 0.06131483316815329, + "grad_norm": 1.0569533109664917, + "learning_rate": 6.041666666666666e-07, + "logits/chosen": -0.421148419380188, + "logits/rejected": -0.38443076610565186, + "logps/chosen": -0.8021283745765686, + "logps/rejected": -0.8370179533958435, + "loss": 1.3916, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.6042567491531372, + "rewards/margins": 0.06977920234203339, + "rewards/rejected": -1.674035906791687, + "step": 29 + }, + { + "epoch": 0.06342913776015857, + "grad_norm": 0.42577147483825684, + "learning_rate": 6.249999999999999e-07, + "logits/chosen": -0.4429818391799927, + "logits/rejected": -0.3524704575538635, + "logps/chosen": -0.8916822671890259, + "logps/rejected": -0.8985542058944702, + "loss": 1.4321, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.7833645343780518, + "rewards/margins": 0.01374388113617897, + "rewards/rejected": -1.7971084117889404, + "step": 30 + }, + { + "epoch": 0.06554344235216386, + "grad_norm": 1.0056904554367065, + "learning_rate": 6.458333333333333e-07, + "logits/chosen": -0.376451700925827, + "logits/rejected": -0.342519074678421, + "logps/chosen": -0.9038617014884949, + "logps/rejected": -0.953092634677887, + "loss": 1.398, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8077234029769897, + "rewards/margins": 0.09846188127994537, + "rewards/rejected": -1.906185269355774, + "step": 31 + }, + { + "epoch": 0.06765774694416915, + "grad_norm": 0.5494012236595154, + "learning_rate": 6.666666666666666e-07, + "logits/chosen": -0.3459138870239258, + "logits/rejected": -0.3590989410877228, + "logps/chosen": -0.8274999260902405, + "logps/rejected": -0.8776509761810303, + "loss": 1.363, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.654999852180481, + "rewards/margins": 0.1003020703792572, + "rewards/rejected": -1.7553019523620605, + "step": 32 + }, + { + "epoch": 0.06977205153617443, + "grad_norm": 0.693267822265625, + "learning_rate": 6.875e-07, + "logits/chosen": -0.40053680539131165, + "logits/rejected": -0.37323904037475586, + "logps/chosen": -0.8255244493484497, + "logps/rejected": -0.8658804893493652, + "loss": 1.3712, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6510488986968994, + "rewards/margins": 0.08071210980415344, + "rewards/rejected": -1.7317609786987305, + "step": 33 + }, + { + "epoch": 0.07188635612817972, + "grad_norm": 2.213238000869751, + "learning_rate": 7.083333333333334e-07, + "logits/chosen": -0.40097948908805847, + "logits/rejected": -0.38190510869026184, + "logps/chosen": -0.9122671484947205, + "logps/rejected": -0.9549552798271179, + "loss": 1.36, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.824534296989441, + "rewards/margins": 0.0853763073682785, + "rewards/rejected": -1.9099105596542358, + "step": 34 + }, + { + "epoch": 0.074000660720185, + "grad_norm": 0.6859830021858215, + "learning_rate": 7.291666666666666e-07, + "logits/chosen": -0.42501094937324524, + "logits/rejected": -0.42549416422843933, + "logps/chosen": -1.0008373260498047, + "logps/rejected": -1.1157118082046509, + "loss": 1.3294, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.0016746520996094, + "rewards/margins": 0.22974897921085358, + "rewards/rejected": -2.2314236164093018, + "step": 35 + }, + { + "epoch": 0.0761149653121903, + "grad_norm": 0.6468721628189087, + "learning_rate": 7.5e-07, + "logits/chosen": -0.36494994163513184, + "logits/rejected": -0.30433908104896545, + "logps/chosen": -0.9062094688415527, + "logps/rejected": -0.920263409614563, + "loss": 1.4312, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.8124189376831055, + "rewards/margins": 0.02810765616595745, + "rewards/rejected": -1.840526819229126, + "step": 36 + }, + { + "epoch": 0.07822926990419557, + "grad_norm": 0.5085556507110596, + "learning_rate": 7.708333333333333e-07, + "logits/chosen": -0.4677881598472595, + "logits/rejected": -0.456132709980011, + "logps/chosen": -1.0101865530014038, + "logps/rejected": -1.0429682731628418, + "loss": 1.4132, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -2.0203731060028076, + "rewards/margins": 0.06556359678506851, + "rewards/rejected": -2.0859365463256836, + "step": 37 + }, + { + "epoch": 0.08034357449620086, + "grad_norm": 0.23813335597515106, + "learning_rate": 7.916666666666666e-07, + "logits/chosen": -0.3991190791130066, + "logits/rejected": -0.3664044141769409, + "logps/chosen": -0.9578174352645874, + "logps/rejected": -0.9229263067245483, + "loss": 1.4824, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.9156348705291748, + "rewards/margins": -0.06978224962949753, + "rewards/rejected": -1.8458526134490967, + "step": 38 + }, + { + "epoch": 0.08245787908820615, + "grad_norm": 0.587037980556488, + "learning_rate": 8.125e-07, + "logits/chosen": -0.37554049491882324, + "logits/rejected": -0.36305734515190125, + "logps/chosen": -0.8503091931343079, + "logps/rejected": -0.864615261554718, + "loss": 1.4086, + "rewards/accuracies": 0.4296875, + "rewards/chosen": -1.7006183862686157, + "rewards/margins": 0.028611989691853523, + "rewards/rejected": -1.729230523109436, + "step": 39 + }, + { + "epoch": 0.08457218368021142, + "grad_norm": 0.4172501862049103, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.4405443072319031, + "logits/rejected": -0.41723060607910156, + "logps/chosen": -0.8502858877182007, + "logps/rejected": -0.9114271402359009, + "loss": 1.3446, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7005717754364014, + "rewards/margins": 0.12228240817785263, + "rewards/rejected": -1.8228542804718018, + "step": 40 + }, + { + "epoch": 0.08668648827221671, + "grad_norm": 0.9275372624397278, + "learning_rate": 8.541666666666666e-07, + "logits/chosen": -0.4200601577758789, + "logits/rejected": -0.3478623628616333, + "logps/chosen": -0.892408013343811, + "logps/rejected": -0.9276402592658997, + "loss": 1.3887, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.784816026687622, + "rewards/margins": 0.07046431303024292, + "rewards/rejected": -1.8552805185317993, + "step": 41 + }, + { + "epoch": 0.088800792864222, + "grad_norm": 0.7317383289337158, + "learning_rate": 8.75e-07, + "logits/chosen": -0.37675267457962036, + "logits/rejected": -0.33540332317352295, + "logps/chosen": -0.7866061925888062, + "logps/rejected": -0.824250340461731, + "loss": 1.3837, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5732123851776123, + "rewards/margins": 0.07528844475746155, + "rewards/rejected": -1.648500680923462, + "step": 42 + }, + { + "epoch": 0.09091509745622729, + "grad_norm": 0.9452736973762512, + "learning_rate": 8.958333333333334e-07, + "logits/chosen": -0.4662383198738098, + "logits/rejected": -0.4447881579399109, + "logps/chosen": -0.9490666389465332, + "logps/rejected": -1.0112388134002686, + "loss": 1.3412, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8981332778930664, + "rewards/margins": 0.12434446066617966, + "rewards/rejected": -2.022477626800537, + "step": 43 + }, + { + "epoch": 0.09302940204823257, + "grad_norm": 0.2848323881626129, + "learning_rate": 9.166666666666665e-07, + "logits/chosen": -0.41404005885124207, + "logits/rejected": -0.3944583535194397, + "logps/chosen": -0.8224930167198181, + "logps/rejected": -0.8416361808776855, + "loss": 1.4027, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.6449860334396362, + "rewards/margins": 0.038286346942186356, + "rewards/rejected": -1.683272361755371, + "step": 44 + }, + { + "epoch": 0.09514370664023786, + "grad_norm": 0.7165678143501282, + "learning_rate": 9.374999999999999e-07, + "logits/chosen": -0.40475326776504517, + "logits/rejected": -0.3559921383857727, + "logps/chosen": -0.8070214986801147, + "logps/rejected": -0.8993593454360962, + "loss": 1.3148, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6140429973602295, + "rewards/margins": 0.18467575311660767, + "rewards/rejected": -1.7987186908721924, + "step": 45 + }, + { + "epoch": 0.09725801123224315, + "grad_norm": 0.4779021739959717, + "learning_rate": 9.583333333333334e-07, + "logits/chosen": -0.4171525835990906, + "logits/rejected": -0.42166149616241455, + "logps/chosen": -0.7872560024261475, + "logps/rejected": -0.8496187925338745, + "loss": 1.3356, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.574512004852295, + "rewards/margins": 0.12472567707300186, + "rewards/rejected": -1.699237585067749, + "step": 46 + }, + { + "epoch": 0.09937231582424844, + "grad_norm": 0.7870219349861145, + "learning_rate": 9.791666666666667e-07, + "logits/chosen": -0.3734116554260254, + "logits/rejected": -0.32778748869895935, + "logps/chosen": -0.7842286825180054, + "logps/rejected": -0.8161548972129822, + "loss": 1.3647, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5684573650360107, + "rewards/margins": 0.06385258585214615, + "rewards/rejected": -1.6323097944259644, + "step": 47 + }, + { + "epoch": 0.10148662041625371, + "grad_norm": 0.2597256600856781, + "learning_rate": 1e-06, + "logits/chosen": -0.4355677664279938, + "logits/rejected": -0.38983187079429626, + "logps/chosen": -0.8787693977355957, + "logps/rejected": -0.9383041262626648, + "loss": 1.35, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.7575387954711914, + "rewards/margins": 0.11906948685646057, + "rewards/rejected": -1.8766082525253296, + "step": 48 + }, + { + "epoch": 0.103600925008259, + "grad_norm": 0.9942799210548401, + "learning_rate": 9.999862751990697e-07, + "logits/chosen": -0.4244321882724762, + "logits/rejected": -0.4366786777973175, + "logps/chosen": -0.7910157442092896, + "logps/rejected": -0.8630884885787964, + "loss": 1.3166, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.582031488418579, + "rewards/margins": 0.14414538443088531, + "rewards/rejected": -1.7261769771575928, + "step": 49 + }, + { + "epoch": 0.10571522960026429, + "grad_norm": 0.5333903431892395, + "learning_rate": 9.999451015497595e-07, + "logits/chosen": -0.389942467212677, + "logits/rejected": -0.36674585938453674, + "logps/chosen": -0.7312074899673462, + "logps/rejected": -0.7289648652076721, + "loss": 1.4225, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4624149799346924, + "rewards/margins": -0.004485193639993668, + "rewards/rejected": -1.4579297304153442, + "step": 50 + }, + { + "epoch": 0.10782953419226958, + "grad_norm": 0.5712242722511292, + "learning_rate": 9.9987648131247e-07, + "logits/chosen": -0.4622853994369507, + "logits/rejected": -0.3728552460670471, + "logps/chosen": -0.8764299750328064, + "logps/rejected": -0.869678795337677, + "loss": 1.4542, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.7528599500656128, + "rewards/margins": -0.013502337038516998, + "rewards/rejected": -1.739357590675354, + "step": 51 + }, + { + "epoch": 0.10994383878427486, + "grad_norm": 0.2586441934108734, + "learning_rate": 9.99780418254397e-07, + "logits/chosen": -0.37249019742012024, + "logits/rejected": -0.3998304605484009, + "logps/chosen": -0.8435611724853516, + "logps/rejected": -0.9359882473945618, + "loss": 1.3057, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6871223449707031, + "rewards/margins": 0.18485431373119354, + "rewards/rejected": -1.8719764947891235, + "step": 52 + }, + { + "epoch": 0.11205814337628014, + "grad_norm": 1.0829113721847534, + "learning_rate": 9.996569176493268e-07, + "logits/chosen": -0.47697725892066956, + "logits/rejected": -0.4208195209503174, + "logps/chosen": -0.8014968037605286, + "logps/rejected": -0.8703804612159729, + "loss": 1.3523, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6029936075210571, + "rewards/margins": 0.1377674788236618, + "rewards/rejected": -1.7407609224319458, + "step": 53 + }, + { + "epoch": 0.11417244796828543, + "grad_norm": 0.5523208379745483, + "learning_rate": 9.995059862773438e-07, + "logits/chosen": -0.40533363819122314, + "logits/rejected": -0.36801978945732117, + "logps/chosen": -0.7641825675964355, + "logps/rejected": -0.8168596029281616, + "loss": 1.3692, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.528365135192871, + "rewards/margins": 0.10535416752099991, + "rewards/rejected": -1.6337192058563232, + "step": 54 + }, + { + "epoch": 0.11628675256029072, + "grad_norm": 0.614101767539978, + "learning_rate": 9.993276324244605e-07, + "logits/chosen": -0.4476906955242157, + "logits/rejected": -0.40396648645401, + "logps/chosen": -0.8706808090209961, + "logps/rejected": -0.9221430420875549, + "loss": 1.3787, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7413616180419922, + "rewards/margins": 0.10292442888021469, + "rewards/rejected": -1.8442860841751099, + "step": 55 + }, + { + "epoch": 0.118401057152296, + "grad_norm": 0.3428778052330017, + "learning_rate": 9.991218658821608e-07, + "logits/chosen": -0.31709593534469604, + "logits/rejected": -0.2760937213897705, + "logps/chosen": -0.842248797416687, + "logps/rejected": -0.8068034648895264, + "loss": 1.498, + "rewards/accuracies": 0.4375, + "rewards/chosen": -1.684497594833374, + "rewards/margins": -0.07089066505432129, + "rewards/rejected": -1.6136069297790527, + "step": 56 + }, + { + "epoch": 0.12051536174430129, + "grad_norm": 0.6877723932266235, + "learning_rate": 9.988886979468643e-07, + "logits/chosen": -0.41800016164779663, + "logits/rejected": -0.4011584222316742, + "logps/chosen": -0.7845420837402344, + "logps/rejected": -0.834447979927063, + "loss": 1.3491, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5690841674804688, + "rewards/margins": 0.09981165081262589, + "rewards/rejected": -1.668895959854126, + "step": 57 + }, + { + "epoch": 0.12262966633630658, + "grad_norm": 0.9649701714515686, + "learning_rate": 9.98628141419305e-07, + "logits/chosen": -0.4253537058830261, + "logits/rejected": -0.4305458962917328, + "logps/chosen": -0.86476731300354, + "logps/rejected": -0.9080386161804199, + "loss": 1.3639, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.72953462600708, + "rewards/margins": 0.08654248714447021, + "rewards/rejected": -1.8160772323608398, + "step": 58 + }, + { + "epoch": 0.12474397092831185, + "grad_norm": 1.3779780864715576, + "learning_rate": 9.98340210603829e-07, + "logits/chosen": -0.39970022439956665, + "logits/rejected": -0.441428005695343, + "logps/chosen": -0.8662775158882141, + "logps/rejected": -0.9646260738372803, + "loss": 1.3001, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7325550317764282, + "rewards/margins": 0.19669723510742188, + "rewards/rejected": -1.9292521476745605, + "step": 59 + }, + { + "epoch": 0.12685827552031714, + "grad_norm": 0.5366966724395752, + "learning_rate": 9.980249213076084e-07, + "logits/chosen": -0.37770116329193115, + "logits/rejected": -0.35231757164001465, + "logps/chosen": -0.8165755867958069, + "logps/rejected": -0.8619179129600525, + "loss": 1.3699, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6331511735916138, + "rewards/margins": 0.09068439900875092, + "rewards/rejected": -1.723835825920105, + "step": 60 + }, + { + "epoch": 0.12897258011232243, + "grad_norm": 0.36810922622680664, + "learning_rate": 9.976822908397748e-07, + "logits/chosen": -0.4224976897239685, + "logits/rejected": -0.41758257150650024, + "logps/chosen": -0.8445641994476318, + "logps/rejected": -0.9393664598464966, + "loss": 1.3193, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6891283988952637, + "rewards/margins": 0.18960458040237427, + "rewards/rejected": -1.8787329196929932, + "step": 61 + }, + { + "epoch": 0.13108688470432772, + "grad_norm": 0.6838279366493225, + "learning_rate": 9.97312338010468e-07, + "logits/chosen": -0.4168627858161926, + "logits/rejected": -0.36115381121635437, + "logps/chosen": -0.8370552659034729, + "logps/rejected": -0.8352169394493103, + "loss": 1.4284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6741105318069458, + "rewards/margins": -0.0036766715347766876, + "rewards/rejected": -1.6704338788986206, + "step": 62 + }, + { + "epoch": 0.133201189296333, + "grad_norm": 0.39330533146858215, + "learning_rate": 9.969150831298037e-07, + "logits/chosen": -0.4558233618736267, + "logits/rejected": -0.4025765061378479, + "logps/chosen": -0.826255738735199, + "logps/rejected": -0.894213080406189, + "loss": 1.3485, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.652511477470398, + "rewards/margins": 0.13591471314430237, + "rewards/rejected": -1.788426160812378, + "step": 63 + }, + { + "epoch": 0.1353154938883383, + "grad_norm": 0.6055929660797119, + "learning_rate": 9.964905480067584e-07, + "logits/chosen": -0.459463506937027, + "logits/rejected": -0.42943331599235535, + "logps/chosen": -0.7901928424835205, + "logps/rejected": -0.7964221239089966, + "loss": 1.4057, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.580385684967041, + "rewards/margins": 0.012458762153983116, + "rewards/rejected": -1.5928442478179932, + "step": 64 + }, + { + "epoch": 0.13742979848034356, + "grad_norm": 0.37883859872817993, + "learning_rate": 9.960387559479725e-07, + "logits/chosen": -0.4447207450866699, + "logits/rejected": -0.371269553899765, + "logps/chosen": -0.7863065004348755, + "logps/rejected": -0.7983666658401489, + "loss": 1.4202, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.572613000869751, + "rewards/margins": 0.02412020042538643, + "rewards/rejected": -1.5967333316802979, + "step": 65 + }, + { + "epoch": 0.13954410307234885, + "grad_norm": 0.31330156326293945, + "learning_rate": 9.955597317564703e-07, + "logits/chosen": -0.42059677839279175, + "logits/rejected": -0.37605100870132446, + "logps/chosen": -0.7669360637664795, + "logps/rejected": -0.8348797559738159, + "loss": 1.3368, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.533872127532959, + "rewards/margins": 0.13588732481002808, + "rewards/rejected": -1.6697595119476318, + "step": 66 + }, + { + "epoch": 0.14165840766435414, + "grad_norm": 0.4353170096874237, + "learning_rate": 9.950535017302983e-07, + "logits/chosen": -0.3897082805633545, + "logits/rejected": -0.38229796290397644, + "logps/chosen": -0.7249190807342529, + "logps/rejected": -0.7696882486343384, + "loss": 1.3511, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4498381614685059, + "rewards/margins": 0.08953814208507538, + "rewards/rejected": -1.5393764972686768, + "step": 67 + }, + { + "epoch": 0.14377271225635943, + "grad_norm": 0.6724106669425964, + "learning_rate": 9.94520093661082e-07, + "logits/chosen": -0.3687596023082733, + "logits/rejected": -0.34222811460494995, + "logps/chosen": -0.7845972776412964, + "logps/rejected": -0.8308086395263672, + "loss": 1.3737, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5691945552825928, + "rewards/margins": 0.09242270141839981, + "rewards/rejected": -1.6616172790527344, + "step": 68 + }, + { + "epoch": 0.14588701684836472, + "grad_norm": 0.7312172651290894, + "learning_rate": 9.939595368324994e-07, + "logits/chosen": -0.4475817382335663, + "logits/rejected": -0.3975730538368225, + "logps/chosen": -0.7314785718917847, + "logps/rejected": -0.7924487590789795, + "loss": 1.3439, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4629571437835693, + "rewards/margins": 0.12194043397903442, + "rewards/rejected": -1.584897518157959, + "step": 69 + }, + { + "epoch": 0.14800132144037, + "grad_norm": 0.301097571849823, + "learning_rate": 9.933718620186744e-07, + "logits/chosen": -0.402032732963562, + "logits/rejected": -0.3640722632408142, + "logps/chosen": -0.7727882862091064, + "logps/rejected": -0.8291516304016113, + "loss": 1.358, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.545576572418213, + "rewards/margins": 0.11272668838500977, + "rewards/rejected": -1.6583032608032227, + "step": 70 + }, + { + "epoch": 0.1501156260323753, + "grad_norm": 0.377835750579834, + "learning_rate": 9.92757101482486e-07, + "logits/chosen": -0.316825270652771, + "logits/rejected": -0.3245603144168854, + "logps/chosen": -0.7962774634361267, + "logps/rejected": -0.8610175848007202, + "loss": 1.3464, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5925549268722534, + "rewards/margins": 0.12948019802570343, + "rewards/rejected": -1.7220351696014404, + "step": 71 + }, + { + "epoch": 0.1522299306243806, + "grad_norm": 0.84058678150177, + "learning_rate": 9.921152889737984e-07, + "logits/chosen": -0.4446060359477997, + "logits/rejected": -0.43160340189933777, + "logps/chosen": -0.7745426297187805, + "logps/rejected": -0.8286185264587402, + "loss": 1.3408, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.549085259437561, + "rewards/margins": 0.10815180093050003, + "rewards/rejected": -1.6572370529174805, + "step": 72 + }, + { + "epoch": 0.15434423521638585, + "grad_norm": 0.6970808506011963, + "learning_rate": 9.91446459727607e-07, + "logits/chosen": -0.4220297634601593, + "logits/rejected": -0.404453307390213, + "logps/chosen": -0.8769615888595581, + "logps/rejected": -0.9564313292503357, + "loss": 1.3423, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7539231777191162, + "rewards/margins": 0.15893957018852234, + "rewards/rejected": -1.9128626585006714, + "step": 73 + }, + { + "epoch": 0.15645853980839114, + "grad_norm": 0.5900676250457764, + "learning_rate": 9.90750650462105e-07, + "logits/chosen": -0.41884100437164307, + "logits/rejected": -0.38551777601242065, + "logps/chosen": -0.814996063709259, + "logps/rejected": -0.8892688751220703, + "loss": 1.3325, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.629992127418518, + "rewards/margins": 0.14854571223258972, + "rewards/rejected": -1.7785377502441406, + "step": 74 + }, + { + "epoch": 0.15857284440039643, + "grad_norm": 0.7245749831199646, + "learning_rate": 9.900278993766668e-07, + "logits/chosen": -0.3451727330684662, + "logits/rejected": -0.3348972201347351, + "logps/chosen": -0.8788102865219116, + "logps/rejected": -0.9273182153701782, + "loss": 1.377, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7576205730438232, + "rewards/margins": 0.09701582789421082, + "rewards/rejected": -1.8546364307403564, + "step": 75 + }, + { + "epoch": 0.16068714899240172, + "grad_norm": 0.3135383725166321, + "learning_rate": 9.89278246149752e-07, + "logits/chosen": -0.4140404760837555, + "logits/rejected": -0.38082340359687805, + "logps/chosen": -0.7513999342918396, + "logps/rejected": -0.7880118489265442, + "loss": 1.3921, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5027998685836792, + "rewards/margins": 0.07322371751070023, + "rewards/rejected": -1.5760236978530884, + "step": 76 + }, + { + "epoch": 0.162801453584407, + "grad_norm": 0.8594076633453369, + "learning_rate": 9.885017319367252e-07, + "logits/chosen": -0.35951656103134155, + "logits/rejected": -0.30456626415252686, + "logps/chosen": -0.7989844679832458, + "logps/rejected": -0.8145395517349243, + "loss": 1.4061, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5979689359664917, + "rewards/margins": 0.031110182404518127, + "rewards/rejected": -1.6290791034698486, + "step": 77 + }, + { + "epoch": 0.1649157581764123, + "grad_norm": 0.4393538534641266, + "learning_rate": 9.876983993675989e-07, + "logits/chosen": -0.33119240403175354, + "logits/rejected": -0.31970253586769104, + "logps/chosen": -0.721772313117981, + "logps/rejected": -0.7868390679359436, + "loss": 1.3325, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.443544626235962, + "rewards/margins": 0.13013358414173126, + "rewards/rejected": -1.5736781358718872, + "step": 78 + }, + { + "epoch": 0.1670300627684176, + "grad_norm": 0.8017925024032593, + "learning_rate": 9.868682925446909e-07, + "logits/chosen": -0.3608989417552948, + "logits/rejected": -0.3497124910354614, + "logps/chosen": -0.8117240071296692, + "logps/rejected": -0.8506529331207275, + "loss": 1.3671, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6234480142593384, + "rewards/margins": 0.07785768806934357, + "rewards/rejected": -1.701305866241455, + "step": 79 + }, + { + "epoch": 0.16914436736042285, + "grad_norm": 0.5704994201660156, + "learning_rate": 9.860114570402054e-07, + "logits/chosen": -0.37441548705101013, + "logits/rejected": -0.32188406586647034, + "logps/chosen": -0.745419442653656, + "logps/rejected": -0.816170871257782, + "loss": 1.3315, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.490838885307312, + "rewards/margins": 0.1415030062198639, + "rewards/rejected": -1.632341742515564, + "step": 80 + }, + { + "epoch": 0.17125867195242814, + "grad_norm": 3.0005106925964355, + "learning_rate": 9.85127939893729e-07, + "logits/chosen": -0.34791454672813416, + "logits/rejected": -0.32542383670806885, + "logps/chosen": -0.7547991275787354, + "logps/rejected": -0.7868378162384033, + "loss": 1.3742, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5095982551574707, + "rewards/margins": 0.06407731771469116, + "rewards/rejected": -1.5736756324768066, + "step": 81 + }, + { + "epoch": 0.17337297654443343, + "grad_norm": 0.5891271829605103, + "learning_rate": 9.842177896096493e-07, + "logits/chosen": -0.38649702072143555, + "logits/rejected": -0.36892226338386536, + "logps/chosen": -0.7556143999099731, + "logps/rejected": -0.82858806848526, + "loss": 1.3233, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5112287998199463, + "rewards/margins": 0.14594702422618866, + "rewards/rejected": -1.65717613697052, + "step": 82 + }, + { + "epoch": 0.17548728113643872, + "grad_norm": 0.27861157059669495, + "learning_rate": 9.832810561544923e-07, + "logits/chosen": -0.38264670968055725, + "logits/rejected": -0.35908499360084534, + "logps/chosen": -0.7858557105064392, + "logps/rejected": -0.8571599721908569, + "loss": 1.3234, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5717114210128784, + "rewards/margins": 0.1426086127758026, + "rewards/rejected": -1.7143199443817139, + "step": 83 + }, + { + "epoch": 0.177601585728444, + "grad_norm": 0.8158763647079468, + "learning_rate": 9.823177909541793e-07, + "logits/chosen": -0.4076104760169983, + "logits/rejected": -0.3934200704097748, + "logps/chosen": -0.8089872002601624, + "logps/rejected": -0.8885407447814941, + "loss": 1.3476, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6179744005203247, + "rewards/margins": 0.15910708904266357, + "rewards/rejected": -1.7770814895629883, + "step": 84 + }, + { + "epoch": 0.1797158903204493, + "grad_norm": 0.46008333563804626, + "learning_rate": 9.813280468912022e-07, + "logits/chosen": -0.33124151825904846, + "logits/rejected": -0.34535717964172363, + "logps/chosen": -0.733020544052124, + "logps/rejected": -0.8716557621955872, + "loss": 1.2807, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.466041088104248, + "rewards/margins": 0.27727028727531433, + "rewards/rejected": -1.7433115243911743, + "step": 85 + }, + { + "epoch": 0.18183019491245458, + "grad_norm": 0.3784334659576416, + "learning_rate": 9.80311878301722e-07, + "logits/chosen": -0.40713849663734436, + "logits/rejected": -0.3808574080467224, + "logps/chosen": -0.7063947319984436, + "logps/rejected": -0.7589148879051208, + "loss": 1.3501, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4127894639968872, + "rewards/margins": 0.10504024475812912, + "rewards/rejected": -1.5178297758102417, + "step": 86 + }, + { + "epoch": 0.18394449950445987, + "grad_norm": 0.6137813329696655, + "learning_rate": 9.792693409725853e-07, + "logits/chosen": -0.4119255542755127, + "logits/rejected": -0.44221603870391846, + "logps/chosen": -0.795850932598114, + "logps/rejected": -0.8925026059150696, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.591701865196228, + "rewards/margins": 0.19330324232578278, + "rewards/rejected": -1.7850052118301392, + "step": 87 + }, + { + "epoch": 0.18605880409646514, + "grad_norm": 0.5354250073432922, + "learning_rate": 9.78200492138261e-07, + "logits/chosen": -0.3792279064655304, + "logits/rejected": -0.3789527714252472, + "logps/chosen": -0.7249161005020142, + "logps/rejected": -0.8088154792785645, + "loss": 1.3304, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4498322010040283, + "rewards/margins": 0.16779886186122894, + "rewards/rejected": -1.617630958557129, + "step": 88 + }, + { + "epoch": 0.18817310868847043, + "grad_norm": 0.6911243796348572, + "learning_rate": 9.771053904776995e-07, + "logits/chosen": -0.38837429881095886, + "logits/rejected": -0.36597418785095215, + "logps/chosen": -0.7528612017631531, + "logps/rejected": -0.7981135249137878, + "loss": 1.3481, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.5057224035263062, + "rewards/margins": 0.09050464630126953, + "rewards/rejected": -1.5962270498275757, + "step": 89 + }, + { + "epoch": 0.19028741328047571, + "grad_norm": 0.37110790610313416, + "learning_rate": 9.759840961111097e-07, + "logits/chosen": -0.3804919421672821, + "logits/rejected": -0.38750600814819336, + "logps/chosen": -0.8673248291015625, + "logps/rejected": -0.9381619691848755, + "loss": 1.3303, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.734649658203125, + "rewards/margins": 0.14167429506778717, + "rewards/rejected": -1.876323938369751, + "step": 90 + }, + { + "epoch": 0.192401717872481, + "grad_norm": 0.8033086657524109, + "learning_rate": 9.748366705966593e-07, + "logits/chosen": -0.3804866075515747, + "logits/rejected": -0.31055447459220886, + "logps/chosen": -0.7535511255264282, + "logps/rejected": -0.7824290990829468, + "loss": 1.3706, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5071022510528564, + "rewards/margins": 0.057755980640649796, + "rewards/rejected": -1.5648581981658936, + "step": 91 + }, + { + "epoch": 0.1945160224644863, + "grad_norm": 0.6934167742729187, + "learning_rate": 9.736631769270957e-07, + "logits/chosen": -0.443461149930954, + "logits/rejected": -0.4398806691169739, + "logps/chosen": -0.8123858571052551, + "logps/rejected": -0.8972252607345581, + "loss": 1.3464, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6247717142105103, + "rewards/margins": 0.16967862844467163, + "rewards/rejected": -1.7944505214691162, + "step": 92 + }, + { + "epoch": 0.19663032705649158, + "grad_norm": 1.140067458152771, + "learning_rate": 9.724636795262866e-07, + "logits/chosen": -0.43793433904647827, + "logits/rejected": -0.4402340352535248, + "logps/chosen": -0.8155819177627563, + "logps/rejected": -0.8659977912902832, + "loss": 1.3621, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6311638355255127, + "rewards/margins": 0.10083187371492386, + "rewards/rejected": -1.7319955825805664, + "step": 93 + }, + { + "epoch": 0.19874463164849687, + "grad_norm": 1.8303897380828857, + "learning_rate": 9.712382442456844e-07, + "logits/chosen": -0.34288379549980164, + "logits/rejected": -0.36632782220840454, + "logps/chosen": -0.7338054776191711, + "logps/rejected": -0.8537961840629578, + "loss": 1.2942, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4676109552383423, + "rewards/margins": 0.23998141288757324, + "rewards/rejected": -1.7075923681259155, + "step": 94 + }, + { + "epoch": 0.20085893624050213, + "grad_norm": 0.34392252564430237, + "learning_rate": 9.6998693836071e-07, + "logits/chosen": -0.4381723999977112, + "logits/rejected": -0.4031081199645996, + "logps/chosen": -0.7130292057991028, + "logps/rejected": -0.7402217388153076, + "loss": 1.3766, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4260584115982056, + "rewards/margins": 0.0543849840760231, + "rewards/rejected": -1.4804434776306152, + "step": 95 + }, + { + "epoch": 0.20297324083250742, + "grad_norm": 0.4129842519760132, + "learning_rate": 9.687098305670604e-07, + "logits/chosen": -0.39796924591064453, + "logits/rejected": -0.3476859927177429, + "logps/chosen": -0.7520885467529297, + "logps/rejected": -0.8058558702468872, + "loss": 1.3663, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5041770935058594, + "rewards/margins": 0.10753461718559265, + "rewards/rejected": -1.6117117404937744, + "step": 96 + }, + { + "epoch": 0.2050875454245127, + "grad_norm": 0.5054985284805298, + "learning_rate": 9.674069909769362e-07, + "logits/chosen": -0.3942393660545349, + "logits/rejected": -0.3627544045448303, + "logps/chosen": -0.733702540397644, + "logps/rejected": -0.781308650970459, + "loss": 1.342, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.467405080795288, + "rewards/margins": 0.09521210938692093, + "rewards/rejected": -1.562617301940918, + "step": 97 + }, + { + "epoch": 0.207201850016518, + "grad_norm": 0.6975870728492737, + "learning_rate": 9.66078491115194e-07, + "logits/chosen": -0.38557127118110657, + "logits/rejected": -0.3581204414367676, + "logps/chosen": -0.7359838485717773, + "logps/rejected": -0.7648134827613831, + "loss": 1.3841, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.4719676971435547, + "rewards/margins": 0.057659298181533813, + "rewards/rejected": -1.5296269655227661, + "step": 98 + }, + { + "epoch": 0.2093161546085233, + "grad_norm": 0.7575029730796814, + "learning_rate": 9.647244039154177e-07, + "logits/chosen": -0.3871467411518097, + "logits/rejected": -0.3941374123096466, + "logps/chosen": -0.6516871452331543, + "logps/rejected": -0.7066073417663574, + "loss": 1.3364, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3033742904663086, + "rewards/margins": 0.10984040796756744, + "rewards/rejected": -1.4132146835327148, + "step": 99 + }, + { + "epoch": 0.21143045920052858, + "grad_norm": 1.3344250917434692, + "learning_rate": 9.633448037159166e-07, + "logits/chosen": -0.40887755155563354, + "logits/rejected": -0.41733911633491516, + "logps/chosen": -0.6978950500488281, + "logps/rejected": -0.793424129486084, + "loss": 1.3076, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3957901000976562, + "rewards/margins": 0.1910584717988968, + "rewards/rejected": -1.586848258972168, + "step": 100 + }, + { + "epoch": 0.21354476379253387, + "grad_norm": 0.8798456788063049, + "learning_rate": 9.619397662556433e-07, + "logits/chosen": -0.302534282207489, + "logits/rejected": -0.29954588413238525, + "logps/chosen": -0.719552755355835, + "logps/rejected": -0.7628123164176941, + "loss": 1.3699, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.43910551071167, + "rewards/margins": 0.08651915192604065, + "rewards/rejected": -1.5256246328353882, + "step": 101 + }, + { + "epoch": 0.21565906838453916, + "grad_norm": 0.8746365308761597, + "learning_rate": 9.605093686700353e-07, + "logits/chosen": -0.372263640165329, + "logits/rejected": -0.3714321255683899, + "logps/chosen": -0.6665956974029541, + "logps/rejected": -0.7361368536949158, + "loss": 1.3173, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3331913948059082, + "rewards/margins": 0.1390824019908905, + "rewards/rejected": -1.4722737073898315, + "step": 102 + }, + { + "epoch": 0.21777337297654442, + "grad_norm": 0.8450930714607239, + "learning_rate": 9.590536894867812e-07, + "logits/chosen": -0.37228280305862427, + "logits/rejected": -0.37763556838035583, + "logps/chosen": -0.7425979375839233, + "logps/rejected": -0.7557005882263184, + "loss": 1.4085, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4851958751678467, + "rewards/margins": 0.026205357164144516, + "rewards/rejected": -1.5114011764526367, + "step": 103 + }, + { + "epoch": 0.2198876775685497, + "grad_norm": 0.5075035691261292, + "learning_rate": 9.575728086215091e-07, + "logits/chosen": -0.4433964788913727, + "logits/rejected": -0.3782787024974823, + "logps/chosen": -0.7308244109153748, + "logps/rejected": -0.8043883442878723, + "loss": 1.3353, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4616488218307495, + "rewards/margins": 0.14712783694267273, + "rewards/rejected": -1.6087766885757446, + "step": 104 + }, + { + "epoch": 0.222001982160555, + "grad_norm": 1.0270946025848389, + "learning_rate": 9.560668073733993e-07, + "logits/chosen": -0.3593980073928833, + "logits/rejected": -0.3159312903881073, + "logps/chosen": -0.757469892501831, + "logps/rejected": -0.8256179094314575, + "loss": 1.3289, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.514939785003662, + "rewards/margins": 0.13629598915576935, + "rewards/rejected": -1.651235818862915, + "step": 105 + }, + { + "epoch": 0.2241162867525603, + "grad_norm": 0.8390078544616699, + "learning_rate": 9.54535768420721e-07, + "logits/chosen": -0.3266332149505615, + "logits/rejected": -0.3008713722229004, + "logps/chosen": -0.7286102771759033, + "logps/rejected": -0.7803273797035217, + "loss": 1.3593, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4572205543518066, + "rewards/margins": 0.10343428701162338, + "rewards/rejected": -1.5606547594070435, + "step": 106 + }, + { + "epoch": 0.22623059134456558, + "grad_norm": 1.130595088005066, + "learning_rate": 9.529797758162934e-07, + "logits/chosen": -0.36109817028045654, + "logits/rejected": -0.34797021746635437, + "logps/chosen": -0.7723361253738403, + "logps/rejected": -0.8873662352561951, + "loss": 1.2956, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5446722507476807, + "rewards/margins": 0.23006024956703186, + "rewards/rejected": -1.7747324705123901, + "step": 107 + }, + { + "epoch": 0.22834489593657087, + "grad_norm": 0.9399718642234802, + "learning_rate": 9.513989149828717e-07, + "logits/chosen": -0.3596777021884918, + "logits/rejected": -0.3660539388656616, + "logps/chosen": -0.7130635976791382, + "logps/rejected": -0.7378955483436584, + "loss": 1.3774, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4261271953582764, + "rewards/margins": 0.04966379329562187, + "rewards/rejected": -1.475791096687317, + "step": 108 + }, + { + "epoch": 0.23045920052857616, + "grad_norm": 1.097594976425171, + "learning_rate": 9.49793272708457e-07, + "logits/chosen": -0.31783169507980347, + "logits/rejected": -0.3008044362068176, + "logps/chosen": -0.6933202147483826, + "logps/rejected": -0.7510000467300415, + "loss": 1.3498, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3866404294967651, + "rewards/margins": 0.1153596043586731, + "rewards/rejected": -1.502000093460083, + "step": 109 + }, + { + "epoch": 0.23257350512058145, + "grad_norm": 1.096330165863037, + "learning_rate": 9.481629371415313e-07, + "logits/chosen": -0.3582899570465088, + "logits/rejected": -0.3120020031929016, + "logps/chosen": -0.817268431186676, + "logps/rejected": -0.8862374424934387, + "loss": 1.3349, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.634536862373352, + "rewards/margins": 0.13793781399726868, + "rewards/rejected": -1.7724748849868774, + "step": 110 + }, + { + "epoch": 0.2346878097125867, + "grad_norm": 0.8261978626251221, + "learning_rate": 9.465079977862192e-07, + "logits/chosen": -0.41336673498153687, + "logits/rejected": -0.39544352889060974, + "logps/chosen": -0.7673372030258179, + "logps/rejected": -0.8331737518310547, + "loss": 1.3373, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5346744060516357, + "rewards/margins": 0.1316729635000229, + "rewards/rejected": -1.6663475036621094, + "step": 111 + }, + { + "epoch": 0.236802114304592, + "grad_norm": 0.5922806262969971, + "learning_rate": 9.448285454973737e-07, + "logits/chosen": -0.3224758207798004, + "logits/rejected": -0.3118049204349518, + "logps/chosen": -0.7584627866744995, + "logps/rejected": -0.8859898447990417, + "loss": 1.2731, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.516925573348999, + "rewards/margins": 0.2550540566444397, + "rewards/rejected": -1.7719796895980835, + "step": 112 + }, + { + "epoch": 0.2389164188965973, + "grad_norm": 1.3172541856765747, + "learning_rate": 9.431246724755877e-07, + "logits/chosen": -0.4287208318710327, + "logits/rejected": -0.3984590172767639, + "logps/chosen": -0.7587048411369324, + "logps/rejected": -0.7860502004623413, + "loss": 1.3832, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5174096822738647, + "rewards/margins": 0.05469079315662384, + "rewards/rejected": -1.5721004009246826, + "step": 113 + }, + { + "epoch": 0.24103072348860258, + "grad_norm": 0.7749882340431213, + "learning_rate": 9.413964722621337e-07, + "logits/chosen": -0.39085906744003296, + "logits/rejected": -0.3316206932067871, + "logps/chosen": -0.7035898566246033, + "logps/rejected": -0.7375759482383728, + "loss": 1.387, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4071797132492065, + "rewards/margins": 0.06797221302986145, + "rewards/rejected": -1.4751518964767456, + "step": 114 + }, + { + "epoch": 0.24314502808060787, + "grad_norm": 1.0914056301116943, + "learning_rate": 9.396440397338272e-07, + "logits/chosen": -0.38826486468315125, + "logits/rejected": -0.35520774126052856, + "logps/chosen": -0.7385872602462769, + "logps/rejected": -0.7974889278411865, + "loss": 1.3477, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4771745204925537, + "rewards/margins": 0.11780343949794769, + "rewards/rejected": -1.594977855682373, + "step": 115 + }, + { + "epoch": 0.24525933267261316, + "grad_norm": 1.2966018915176392, + "learning_rate": 9.378674710978183e-07, + "logits/chosen": -0.36493802070617676, + "logits/rejected": -0.34763696789741516, + "logps/chosen": -0.6731826663017273, + "logps/rejected": -0.7645149827003479, + "loss": 1.3, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3463653326034546, + "rewards/margins": 0.18266455829143524, + "rewards/rejected": -1.5290299654006958, + "step": 116 + }, + { + "epoch": 0.24737363726461845, + "grad_norm": 0.49401605129241943, + "learning_rate": 9.360668638863109e-07, + "logits/chosen": -0.40416795015335083, + "logits/rejected": -0.3815993070602417, + "logps/chosen": -0.719497799873352, + "logps/rejected": -0.7588324546813965, + "loss": 1.3621, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.438995599746704, + "rewards/margins": 0.07866920530796051, + "rewards/rejected": -1.517664909362793, + "step": 117 + }, + { + "epoch": 0.2494879418566237, + "grad_norm": 1.0603238344192505, + "learning_rate": 9.342423169512071e-07, + "logits/chosen": -0.3857055604457855, + "logits/rejected": -0.3524513244628906, + "logps/chosen": -0.7373769283294678, + "logps/rejected": -0.7971038818359375, + "loss": 1.3358, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4747538566589355, + "rewards/margins": 0.11945393681526184, + "rewards/rejected": -1.594207763671875, + "step": 118 + }, + { + "epoch": 0.251602246448629, + "grad_norm": 0.9880490303039551, + "learning_rate": 9.323939304586804e-07, + "logits/chosen": -0.31455785036087036, + "logits/rejected": -0.3102484941482544, + "logps/chosen": -0.7276102900505066, + "logps/rejected": -0.7446941137313843, + "loss": 1.3928, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.4552205801010132, + "rewards/margins": 0.034167706966400146, + "rewards/rejected": -1.4893882274627686, + "step": 119 + }, + { + "epoch": 0.2537165510406343, + "grad_norm": 1.0419566631317139, + "learning_rate": 9.305218058836776e-07, + "logits/chosen": -0.38093918561935425, + "logits/rejected": -0.3588898181915283, + "logps/chosen": -0.715582013130188, + "logps/rejected": -0.8271002769470215, + "loss": 1.2934, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.431164026260376, + "rewards/margins": 0.22303667664527893, + "rewards/rejected": -1.654200553894043, + "step": 120 + }, + { + "epoch": 0.2558308556326396, + "grad_norm": 0.657620370388031, + "learning_rate": 9.286260460043473e-07, + "logits/chosen": -0.45690783858299255, + "logits/rejected": -0.4082674980163574, + "logps/chosen": -0.6932571530342102, + "logps/rejected": -0.7631082534790039, + "loss": 1.3398, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3865143060684204, + "rewards/margins": 0.13970226049423218, + "rewards/rejected": -1.5262165069580078, + "step": 121 + }, + { + "epoch": 0.25794516022464486, + "grad_norm": 0.983686089515686, + "learning_rate": 9.267067548963974e-07, + "logits/chosen": -0.40266987681388855, + "logits/rejected": -0.37586671113967896, + "logps/chosen": -0.7362720966339111, + "logps/rejected": -0.7538987398147583, + "loss": 1.4066, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4725441932678223, + "rewards/margins": 0.03525342047214508, + "rewards/rejected": -1.5077974796295166, + "step": 122 + }, + { + "epoch": 0.26005946481665015, + "grad_norm": 1.0076361894607544, + "learning_rate": 9.24764037927381e-07, + "logits/chosen": -0.4461461007595062, + "logits/rejected": -0.40700826048851013, + "logps/chosen": -0.7206646800041199, + "logps/rejected": -0.7489192485809326, + "loss": 1.3759, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4413293600082397, + "rewards/margins": 0.05650928616523743, + "rewards/rejected": -1.4978384971618652, + "step": 123 + }, + { + "epoch": 0.26217376940865544, + "grad_norm": 0.933315098285675, + "learning_rate": 9.22798001750913e-07, + "logits/chosen": -0.3966676890850067, + "logits/rejected": -0.3572196960449219, + "logps/chosen": -0.7075096368789673, + "logps/rejected": -0.7406759262084961, + "loss": 1.3667, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4150192737579346, + "rewards/margins": 0.06633266806602478, + "rewards/rejected": -1.4813518524169922, + "step": 124 + }, + { + "epoch": 0.26428807400066073, + "grad_norm": 0.6277392506599426, + "learning_rate": 9.20808754300814e-07, + "logits/chosen": -0.3555490970611572, + "logits/rejected": -0.35786163806915283, + "logps/chosen": -0.7549921274185181, + "logps/rejected": -0.832869291305542, + "loss": 1.3175, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.5099842548370361, + "rewards/margins": 0.15575438737869263, + "rewards/rejected": -1.665738582611084, + "step": 125 + }, + { + "epoch": 0.266402378592666, + "grad_norm": 0.7172744274139404, + "learning_rate": 9.18796404785185e-07, + "logits/chosen": -0.41230690479278564, + "logits/rejected": -0.39935630559921265, + "logps/chosen": -0.7129833698272705, + "logps/rejected": -0.7888559103012085, + "loss": 1.3167, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.425966739654541, + "rewards/margins": 0.15174514055252075, + "rewards/rejected": -1.577711820602417, + "step": 126 + }, + { + "epoch": 0.2685166831846713, + "grad_norm": 1.2629508972167969, + "learning_rate": 9.16761063680412e-07, + "logits/chosen": -0.36754000186920166, + "logits/rejected": -0.3541562259197235, + "logps/chosen": -0.6992133855819702, + "logps/rejected": -0.7668892741203308, + "loss": 1.3735, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3984267711639404, + "rewards/margins": 0.13535188138484955, + "rewards/rejected": -1.5337785482406616, + "step": 127 + }, + { + "epoch": 0.2706309877766766, + "grad_norm": 0.7024405598640442, + "learning_rate": 9.147028427251009e-07, + "logits/chosen": -0.4014585018157959, + "logits/rejected": -0.40560898184776306, + "logps/chosen": -0.727234959602356, + "logps/rejected": -0.8070081472396851, + "loss": 1.3138, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.454469919204712, + "rewards/margins": 0.15954652428627014, + "rewards/rejected": -1.6140162944793701, + "step": 128 + }, + { + "epoch": 0.2727452923686819, + "grad_norm": 1.6173532009124756, + "learning_rate": 9.126218549139433e-07, + "logits/chosen": -0.32572367787361145, + "logits/rejected": -0.3470613956451416, + "logps/chosen": -0.7555541396141052, + "logps/rejected": -0.8856738209724426, + "loss": 1.2461, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.5111082792282104, + "rewards/margins": 0.26023951172828674, + "rewards/rejected": -1.7713476419448853, + "step": 129 + }, + { + "epoch": 0.2748595969606871, + "grad_norm": 0.5878487229347229, + "learning_rate": 9.105182144915129e-07, + "logits/chosen": -0.39267170429229736, + "logits/rejected": -0.3448992967605591, + "logps/chosen": -0.6776289343833923, + "logps/rejected": -0.7530183792114258, + "loss": 1.3242, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3552578687667847, + "rewards/margins": 0.15077897906303406, + "rewards/rejected": -1.5060367584228516, + "step": 130 + }, + { + "epoch": 0.2769739015526924, + "grad_norm": 0.43264809250831604, + "learning_rate": 9.08392036945994e-07, + "logits/chosen": -0.39980950951576233, + "logits/rejected": -0.4247930645942688, + "logps/chosen": -0.7898982167243958, + "logps/rejected": -0.8856299519538879, + "loss": 1.3004, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5797964334487915, + "rewards/margins": 0.19146347045898438, + "rewards/rejected": -1.7712599039077759, + "step": 131 + }, + { + "epoch": 0.2790882061446977, + "grad_norm": 1.0348538160324097, + "learning_rate": 9.062434390028407e-07, + "logits/chosen": -0.35729700326919556, + "logits/rejected": -0.3265542984008789, + "logps/chosen": -0.7120587229728699, + "logps/rejected": -0.771691083908081, + "loss": 1.3374, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4241174459457397, + "rewards/margins": 0.11926469206809998, + "rewards/rejected": -1.543382167816162, + "step": 132 + }, + { + "epoch": 0.281202510736703, + "grad_norm": 2.0902225971221924, + "learning_rate": 9.04072538618369e-07, + "logits/chosen": -0.4942469298839569, + "logits/rejected": -0.48699846863746643, + "logps/chosen": -0.7882512211799622, + "logps/rejected": -0.8270165920257568, + "loss": 1.3715, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5765024423599243, + "rewards/margins": 0.07753071188926697, + "rewards/rejected": -1.6540331840515137, + "step": 133 + }, + { + "epoch": 0.2833168153287083, + "grad_norm": 1.6436113119125366, + "learning_rate": 9.018794549732817e-07, + "logits/chosen": -0.41133156418800354, + "logits/rejected": -0.4146718382835388, + "logps/chosen": -0.779824435710907, + "logps/rejected": -0.9421006441116333, + "loss": 1.2521, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.559648871421814, + "rewards/margins": 0.324552446603775, + "rewards/rejected": -1.8842012882232666, + "step": 134 + }, + { + "epoch": 0.28543111992071357, + "grad_norm": 0.8831859827041626, + "learning_rate": 8.996643084661244e-07, + "logits/chosen": -0.42452165484428406, + "logits/rejected": -0.3798604905605316, + "logps/chosen": -0.6499216556549072, + "logps/rejected": -0.7796702980995178, + "loss": 1.2581, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.2998433113098145, + "rewards/margins": 0.25949734449386597, + "rewards/rejected": -1.5593405961990356, + "step": 135 + }, + { + "epoch": 0.28754542451271886, + "grad_norm": 0.8031218647956848, + "learning_rate": 8.974272207066767e-07, + "logits/chosen": -0.38131940364837646, + "logits/rejected": -0.3854255676269531, + "logps/chosen": -0.7026851773262024, + "logps/rejected": -0.762391209602356, + "loss": 1.3333, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4053703546524048, + "rewards/margins": 0.11941206455230713, + "rewards/rejected": -1.524782419204712, + "step": 136 + }, + { + "epoch": 0.28965972910472415, + "grad_norm": 1.4455821514129639, + "learning_rate": 8.951683145092748e-07, + "logits/chosen": -0.42824965715408325, + "logits/rejected": -0.4320424795150757, + "logps/chosen": -0.7893270254135132, + "logps/rejected": -0.8517144322395325, + "loss": 1.3652, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5786540508270264, + "rewards/margins": 0.12477481365203857, + "rewards/rejected": -1.703428864479065, + "step": 137 + }, + { + "epoch": 0.29177403369672944, + "grad_norm": 0.6299450397491455, + "learning_rate": 8.928877138860706e-07, + "logits/chosen": -0.4388589560985565, + "logits/rejected": -0.40156903862953186, + "logps/chosen": -0.7346572875976562, + "logps/rejected": -0.8166492581367493, + "loss": 1.3134, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4693145751953125, + "rewards/margins": 0.16398391127586365, + "rewards/rejected": -1.6332985162734985, + "step": 138 + }, + { + "epoch": 0.29388833828873473, + "grad_norm": 2.784437417984009, + "learning_rate": 8.905855440402224e-07, + "logits/chosen": -0.405662477016449, + "logits/rejected": -0.35549795627593994, + "logps/chosen": -0.7482771277427673, + "logps/rejected": -0.795568585395813, + "loss": 1.3656, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.4965542554855347, + "rewards/margins": 0.09458285570144653, + "rewards/rejected": -1.591137170791626, + "step": 139 + }, + { + "epoch": 0.29600264288074, + "grad_norm": 0.4958692193031311, + "learning_rate": 8.882619313590212e-07, + "logits/chosen": -0.3814452886581421, + "logits/rejected": -0.35715553164482117, + "logps/chosen": -0.7731542587280273, + "logps/rejected": -0.8285202980041504, + "loss": 1.3776, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5463085174560547, + "rewards/margins": 0.11073210835456848, + "rewards/rejected": -1.6570405960083008, + "step": 140 + }, + { + "epoch": 0.2981169474727453, + "grad_norm": 0.4597362279891968, + "learning_rate": 8.859170034069532e-07, + "logits/chosen": -0.388383150100708, + "logits/rejected": -0.4071737229824066, + "logps/chosen": -0.7263504266738892, + "logps/rejected": -0.769676148891449, + "loss": 1.3712, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4527008533477783, + "rewards/margins": 0.08665145933628082, + "rewards/rejected": -1.539352297782898, + "step": 141 + }, + { + "epoch": 0.3002312520647506, + "grad_norm": 0.4914930760860443, + "learning_rate": 8.835508889186956e-07, + "logits/chosen": -0.41084378957748413, + "logits/rejected": -0.3823031187057495, + "logps/chosen": -0.7565821409225464, + "logps/rejected": -0.9084322452545166, + "loss": 1.2717, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5131642818450928, + "rewards/margins": 0.3037002384662628, + "rewards/rejected": -1.8168644905090332, + "step": 142 + }, + { + "epoch": 0.3023455566567559, + "grad_norm": 2.0075581073760986, + "learning_rate": 8.811637177920499e-07, + "logits/chosen": -0.4438302516937256, + "logits/rejected": -0.4916025698184967, + "logps/chosen": -0.800719141960144, + "logps/rejected": -0.8658267855644226, + "loss": 1.358, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.601438283920288, + "rewards/margins": 0.1302153617143631, + "rewards/rejected": -1.7316535711288452, + "step": 143 + }, + { + "epoch": 0.3044598612487612, + "grad_norm": 1.1243022680282593, + "learning_rate": 8.7875562108081e-07, + "logits/chosen": -0.40519949793815613, + "logits/rejected": -0.3905750811100006, + "logps/chosen": -0.689585268497467, + "logps/rejected": -0.7312421798706055, + "loss": 1.3503, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.379170536994934, + "rewards/margins": 0.08331384509801865, + "rewards/rejected": -1.462484359741211, + "step": 144 + }, + { + "epoch": 0.3065741658407664, + "grad_norm": 0.7543137669563293, + "learning_rate": 8.76326730987568e-07, + "logits/chosen": -0.4696752727031708, + "logits/rejected": -0.4357326626777649, + "logps/chosen": -0.7813425660133362, + "logps/rejected": -0.8276973962783813, + "loss": 1.3794, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5626851320266724, + "rewards/margins": 0.09270970523357391, + "rewards/rejected": -1.6553947925567627, + "step": 145 + }, + { + "epoch": 0.3086884704327717, + "grad_norm": 1.3136053085327148, + "learning_rate": 8.738771808564555e-07, + "logits/chosen": -0.4262731075286865, + "logits/rejected": -0.44038820266723633, + "logps/chosen": -0.697494387626648, + "logps/rejected": -0.8369535803794861, + "loss": 1.2699, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.394988775253296, + "rewards/margins": 0.2789183557033539, + "rewards/rejected": -1.6739071607589722, + "step": 146 + }, + { + "epoch": 0.310802775024777, + "grad_norm": 2.221562385559082, + "learning_rate": 8.714071051658245e-07, + "logits/chosen": -0.40089336037635803, + "logits/rejected": -0.37991875410079956, + "logps/chosen": -0.7704445123672485, + "logps/rejected": -0.859091579914093, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.540889024734497, + "rewards/margins": 0.17729414999485016, + "rewards/rejected": -1.718183159828186, + "step": 147 + }, + { + "epoch": 0.3129170796167823, + "grad_norm": 1.5049912929534912, + "learning_rate": 8.689166395208636e-07, + "logits/chosen": -0.38984015583992004, + "logits/rejected": -0.35900723934173584, + "logps/chosen": -0.6424779891967773, + "logps/rejected": -0.7145389318466187, + "loss": 1.3261, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.2849559783935547, + "rewards/margins": 0.14412200450897217, + "rewards/rejected": -1.4290778636932373, + "step": 148 + }, + { + "epoch": 0.31503138420878757, + "grad_norm": 0.36125388741493225, + "learning_rate": 8.664059206461534e-07, + "logits/chosen": -0.3490441143512726, + "logits/rejected": -0.3219914436340332, + "logps/chosen": -0.7200264930725098, + "logps/rejected": -0.7924249768257141, + "loss": 1.3476, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4400529861450195, + "rewards/margins": 0.1447969526052475, + "rewards/rejected": -1.5848499536514282, + "step": 149 + }, + { + "epoch": 0.31714568880079286, + "grad_norm": 1.039840579032898, + "learning_rate": 8.638750863781612e-07, + "logits/chosen": -0.40701645612716675, + "logits/rejected": -0.406186580657959, + "logps/chosen": -0.7083575129508972, + "logps/rejected": -0.7766748070716858, + "loss": 1.3263, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4167150259017944, + "rewards/margins": 0.1366347074508667, + "rewards/rejected": -1.5533496141433716, + "step": 150 + }, + { + "epoch": 0.31925999339279815, + "grad_norm": 0.7128564119338989, + "learning_rate": 8.613242756576728e-07, + "logits/chosen": -0.40932926535606384, + "logits/rejected": -0.4234562814235687, + "logps/chosen": -0.6775843501091003, + "logps/rejected": -0.7866222858428955, + "loss": 1.2834, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.3551687002182007, + "rewards/margins": 0.2180757373571396, + "rewards/rejected": -1.573244571685791, + "step": 151 + }, + { + "epoch": 0.32137429798480344, + "grad_norm": 1.1701059341430664, + "learning_rate": 8.587536285221655e-07, + "logits/chosen": -0.3654797077178955, + "logits/rejected": -0.3181680738925934, + "logps/chosen": -0.6686022877693176, + "logps/rejected": -0.7058504223823547, + "loss": 1.3612, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.3372045755386353, + "rewards/margins": 0.07449636608362198, + "rewards/rejected": -1.4117008447647095, + "step": 152 + }, + { + "epoch": 0.3234886025768087, + "grad_norm": 0.8239700794219971, + "learning_rate": 8.561632860981204e-07, + "logits/chosen": -0.42527130246162415, + "logits/rejected": -0.4091627299785614, + "logps/chosen": -0.6969794631004333, + "logps/rejected": -0.8019355535507202, + "loss": 1.2974, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3939589262008667, + "rewards/margins": 0.20991206169128418, + "rewards/rejected": -1.6038711071014404, + "step": 153 + }, + { + "epoch": 0.325602907168814, + "grad_norm": 1.4885636568069458, + "learning_rate": 8.535533905932737e-07, + "logits/chosen": -0.4126192331314087, + "logits/rejected": -0.41548141837120056, + "logps/chosen": -0.7076549530029297, + "logps/rejected": -0.7940821051597595, + "loss": 1.3198, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4153099060058594, + "rewards/margins": 0.17285437881946564, + "rewards/rejected": -1.588164210319519, + "step": 154 + }, + { + "epoch": 0.3277172117608193, + "grad_norm": 1.439434289932251, + "learning_rate": 8.509240852888106e-07, + "logits/chosen": -0.3763914704322815, + "logits/rejected": -0.3617165684700012, + "logps/chosen": -0.7189474105834961, + "logps/rejected": -0.827629804611206, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4378948211669922, + "rewards/margins": 0.2173648476600647, + "rewards/rejected": -1.655259609222412, + "step": 155 + }, + { + "epoch": 0.3298315163528246, + "grad_norm": 1.4505418539047241, + "learning_rate": 8.482755145314985e-07, + "logits/chosen": -0.37879478931427, + "logits/rejected": -0.38689684867858887, + "logps/chosen": -0.7011865973472595, + "logps/rejected": -0.8019431829452515, + "loss": 1.3158, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.402373194694519, + "rewards/margins": 0.2015131413936615, + "rewards/rejected": -1.603886365890503, + "step": 156 + }, + { + "epoch": 0.3319458209448299, + "grad_norm": 2.0968713760375977, + "learning_rate": 8.45607823725763e-07, + "logits/chosen": -0.4366365075111389, + "logits/rejected": -0.41210681200027466, + "logps/chosen": -0.6455651521682739, + "logps/rejected": -0.7228428721427917, + "loss": 1.3247, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.2911303043365479, + "rewards/margins": 0.1545555144548416, + "rewards/rejected": -1.4456857442855835, + "step": 157 + }, + { + "epoch": 0.3340601255368352, + "grad_norm": 0.6716106534004211, + "learning_rate": 8.429211593257052e-07, + "logits/chosen": -0.42992207407951355, + "logits/rejected": -0.4105672836303711, + "logps/chosen": -0.6981461048126221, + "logps/rejected": -0.7909567952156067, + "loss": 1.3128, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3962922096252441, + "rewards/margins": 0.1856214702129364, + "rewards/rejected": -1.5819135904312134, + "step": 158 + }, + { + "epoch": 0.33617443012884046, + "grad_norm": 2.4430501461029053, + "learning_rate": 8.402156688270612e-07, + "logits/chosen": -0.4184916317462921, + "logits/rejected": -0.3943992257118225, + "logps/chosen": -0.6568948030471802, + "logps/rejected": -0.7506390810012817, + "loss": 1.2992, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3137896060943604, + "rewards/margins": 0.18748846650123596, + "rewards/rejected": -1.5012781620025635, + "step": 159 + }, + { + "epoch": 0.3382887347208457, + "grad_norm": 2.0322091579437256, + "learning_rate": 8.374915007591052e-07, + "logits/chosen": -0.4713057577610016, + "logits/rejected": -0.42163771390914917, + "logps/chosen": -0.7347853779792786, + "logps/rejected": -0.7770044207572937, + "loss": 1.3801, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4695707559585571, + "rewards/margins": 0.0844380110502243, + "rewards/rejected": -1.5540088415145874, + "step": 160 + }, + { + "epoch": 0.340403039312851, + "grad_norm": 0.4045500159263611, + "learning_rate": 8.347488046764948e-07, + "logits/chosen": -0.39465126395225525, + "logits/rejected": -0.3961923122406006, + "logps/chosen": -0.601732075214386, + "logps/rejected": -0.694148600101471, + "loss": 1.2859, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.203464150428772, + "rewards/margins": 0.18483319878578186, + "rewards/rejected": -1.388297200202942, + "step": 161 + }, + { + "epoch": 0.3425173439048563, + "grad_norm": 2.79396915435791, + "learning_rate": 8.319877311510612e-07, + "logits/chosen": -0.4311378002166748, + "logits/rejected": -0.4248836636543274, + "logps/chosen": -0.6813413500785828, + "logps/rejected": -0.775830864906311, + "loss": 1.3001, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3626827001571655, + "rewards/margins": 0.18897925317287445, + "rewards/rejected": -1.551661729812622, + "step": 162 + }, + { + "epoch": 0.34463164849686156, + "grad_norm": 0.714146077632904, + "learning_rate": 8.292084317635419e-07, + "logits/chosen": -0.4060715436935425, + "logits/rejected": -0.3770482540130615, + "logps/chosen": -0.7176523208618164, + "logps/rejected": -0.7973593473434448, + "loss": 1.324, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4353046417236328, + "rewards/margins": 0.15941408276557922, + "rewards/rejected": -1.5947186946868896, + "step": 163 + }, + { + "epoch": 0.34674595308886685, + "grad_norm": 1.6007037162780762, + "learning_rate": 8.264110590952607e-07, + "logits/chosen": -0.49063974618911743, + "logits/rejected": -0.5119628310203552, + "logps/chosen": -0.7263911366462708, + "logps/rejected": -0.9138184785842896, + "loss": 1.2439, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.4527822732925415, + "rewards/margins": 0.3748546540737152, + "rewards/rejected": -1.827636957168579, + "step": 164 + }, + { + "epoch": 0.34886025768087214, + "grad_norm": 1.4566830396652222, + "learning_rate": 8.235957667197494e-07, + "logits/chosen": -0.4681779146194458, + "logits/rejected": -0.46475380659103394, + "logps/chosen": -0.6923782229423523, + "logps/rejected": -0.7901281118392944, + "loss": 1.295, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.3847564458847046, + "rewards/margins": 0.19549959897994995, + "rewards/rejected": -1.5802562236785889, + "step": 165 + }, + { + "epoch": 0.35097456227287743, + "grad_norm": 3.0825328826904297, + "learning_rate": 8.207627091943177e-07, + "logits/chosen": -0.4294862151145935, + "logits/rejected": -0.42411237955093384, + "logps/chosen": -0.6851246356964111, + "logps/rejected": -0.7844961881637573, + "loss": 1.2871, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3702492713928223, + "rewards/margins": 0.19874317944049835, + "rewards/rejected": -1.5689923763275146, + "step": 166 + }, + { + "epoch": 0.3530888668648827, + "grad_norm": 1.0783339738845825, + "learning_rate": 8.179120420515675e-07, + "logits/chosen": -0.4528030455112457, + "logits/rejected": -0.4626815617084503, + "logps/chosen": -0.703376293182373, + "logps/rejected": -0.8752757906913757, + "loss": 1.2193, + "rewards/accuracies": 0.703125, + "rewards/chosen": -1.406752586364746, + "rewards/margins": 0.34379899501800537, + "rewards/rejected": -1.7505515813827515, + "step": 167 + }, + { + "epoch": 0.355203171456888, + "grad_norm": 2.6788036823272705, + "learning_rate": 8.150439217908556e-07, + "logits/chosen": -0.44946759939193726, + "logits/rejected": -0.47430264949798584, + "logps/chosen": -0.751136839389801, + "logps/rejected": -0.874577522277832, + "loss": 1.29, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.502273678779602, + "rewards/margins": 0.24688144028186798, + "rewards/rejected": -1.749155044555664, + "step": 168 + }, + { + "epoch": 0.3573174760488933, + "grad_norm": 0.9087730050086975, + "learning_rate": 8.121585058696999e-07, + "logits/chosen": -0.47294262051582336, + "logits/rejected": -0.46765226125717163, + "logps/chosen": -0.7291173934936523, + "logps/rejected": -0.7999277114868164, + "loss": 1.3482, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4582347869873047, + "rewards/margins": 0.1416206806898117, + "rewards/rejected": -1.5998554229736328, + "step": 169 + }, + { + "epoch": 0.3594317806408986, + "grad_norm": 3.392674207687378, + "learning_rate": 8.092559526951374e-07, + "logits/chosen": -0.5026620626449585, + "logits/rejected": -0.46620574593544006, + "logps/chosen": -0.746992290019989, + "logps/rejected": -0.8266301155090332, + "loss": 1.3202, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.493984580039978, + "rewards/margins": 0.15927578508853912, + "rewards/rejected": -1.6532602310180664, + "step": 170 + }, + { + "epoch": 0.3615460852329039, + "grad_norm": 1.27628755569458, + "learning_rate": 8.063364216150256e-07, + "logits/chosen": -0.5211395025253296, + "logits/rejected": -0.5419963598251343, + "logps/chosen": -0.7919114828109741, + "logps/rejected": -0.8731362223625183, + "loss": 1.3228, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5838229656219482, + "rewards/margins": 0.16244953870773315, + "rewards/rejected": -1.7462724447250366, + "step": 171 + }, + { + "epoch": 0.36366038982490917, + "grad_norm": 0.8269656896591187, + "learning_rate": 8.034000729092967e-07, + "logits/chosen": -0.49545183777809143, + "logits/rejected": -0.4716613292694092, + "logps/chosen": -0.719520092010498, + "logps/rejected": -0.7876347303390503, + "loss": 1.3367, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.439040184020996, + "rewards/margins": 0.13622930645942688, + "rewards/rejected": -1.5752694606781006, + "step": 172 + }, + { + "epoch": 0.36577469441691446, + "grad_norm": 0.6049383282661438, + "learning_rate": 8.004470677811559e-07, + "logits/chosen": -0.45276379585266113, + "logits/rejected": -0.42617955803871155, + "logps/chosen": -0.7097947597503662, + "logps/rejected": -0.7606989145278931, + "loss": 1.3909, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4195895195007324, + "rewards/margins": 0.10180822014808655, + "rewards/rejected": -1.5213978290557861, + "step": 173 + }, + { + "epoch": 0.36788899900891975, + "grad_norm": 3.980013847351074, + "learning_rate": 7.974775683482337e-07, + "logits/chosen": -0.4783569574356079, + "logits/rejected": -0.43521156907081604, + "logps/chosen": -0.7623491287231445, + "logps/rejected": -0.8719285130500793, + "loss": 1.2838, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.524698257446289, + "rewards/margins": 0.2191585898399353, + "rewards/rejected": -1.7438570261001587, + "step": 174 + }, + { + "epoch": 0.370003303600925, + "grad_norm": 1.024530053138733, + "learning_rate": 7.94491737633684e-07, + "logits/chosen": -0.5009916424751282, + "logits/rejected": -0.48874592781066895, + "logps/chosen": -0.7552992701530457, + "logps/rejected": -0.8485872745513916, + "loss": 1.3153, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5105985403060913, + "rewards/margins": 0.18657605350017548, + "rewards/rejected": -1.6971745491027832, + "step": 175 + }, + { + "epoch": 0.37211760819293027, + "grad_norm": 1.5952919721603394, + "learning_rate": 7.91489739557236e-07, + "logits/chosen": -0.4424138069152832, + "logits/rejected": -0.4334307312965393, + "logps/chosen": -0.6956002116203308, + "logps/rejected": -0.8018803000450134, + "loss": 1.3011, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.3912004232406616, + "rewards/margins": 0.21256020665168762, + "rewards/rejected": -1.6037606000900269, + "step": 176 + }, + { + "epoch": 0.37423191278493556, + "grad_norm": 1.8331164121627808, + "learning_rate": 7.884717389261934e-07, + "logits/chosen": -0.4836267828941345, + "logits/rejected": -0.5018677115440369, + "logps/chosen": -0.7895969152450562, + "logps/rejected": -0.927432656288147, + "loss": 1.2467, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5791938304901123, + "rewards/margins": 0.27567166090011597, + "rewards/rejected": -1.854865312576294, + "step": 177 + }, + { + "epoch": 0.37634621737694085, + "grad_norm": 2.165984869003296, + "learning_rate": 7.854379014263876e-07, + "logits/chosen": -0.46125832200050354, + "logits/rejected": -0.39802712202072144, + "logps/chosen": -0.8382925391197205, + "logps/rejected": -0.9422982931137085, + "loss": 1.339, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.676585078239441, + "rewards/margins": 0.20801125466823578, + "rewards/rejected": -1.884596586227417, + "step": 178 + }, + { + "epoch": 0.37846052196894614, + "grad_norm": 0.522197425365448, + "learning_rate": 7.823883936130817e-07, + "logits/chosen": -0.4747823476791382, + "logits/rejected": -0.4888593554496765, + "logps/chosen": -0.723059892654419, + "logps/rejected": -0.84626305103302, + "loss": 1.2708, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.446119785308838, + "rewards/margins": 0.24640652537345886, + "rewards/rejected": -1.69252610206604, + "step": 179 + }, + { + "epoch": 0.38057482656095143, + "grad_norm": 1.9690748453140259, + "learning_rate": 7.793233829018262e-07, + "logits/chosen": -0.5430271625518799, + "logits/rejected": -0.5403288006782532, + "logps/chosen": -0.8244275450706482, + "logps/rejected": -0.9133931994438171, + "loss": 1.3306, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6488550901412964, + "rewards/margins": 0.17793115973472595, + "rewards/rejected": -1.8267863988876343, + "step": 180 + }, + { + "epoch": 0.3826891311529567, + "grad_norm": 2.9181363582611084, + "learning_rate": 7.762430375592688e-07, + "logits/chosen": -0.4843495786190033, + "logits/rejected": -0.47929176688194275, + "logps/chosen": -0.8097372055053711, + "logps/rejected": -0.8973760008811951, + "loss": 1.3283, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6194744110107422, + "rewards/margins": 0.17527759075164795, + "rewards/rejected": -1.7947520017623901, + "step": 181 + }, + { + "epoch": 0.384803435744962, + "grad_norm": 4.227083683013916, + "learning_rate": 7.731475266939158e-07, + "logits/chosen": -0.5047686696052551, + "logits/rejected": -0.4921850264072418, + "logps/chosen": -0.875984787940979, + "logps/rejected": -1.0406755208969116, + "loss": 1.3169, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.751969575881958, + "rewards/margins": 0.32938146591186523, + "rewards/rejected": -2.0813510417938232, + "step": 182 + }, + { + "epoch": 0.3869177403369673, + "grad_norm": 1.2871490716934204, + "learning_rate": 7.700370202468489e-07, + "logits/chosen": -0.5123783349990845, + "logits/rejected": -0.55179762840271, + "logps/chosen": -0.8869211077690125, + "logps/rejected": -1.1082773208618164, + "loss": 1.216, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.773842215538025, + "rewards/margins": 0.4427123963832855, + "rewards/rejected": -2.216554641723633, + "step": 183 + }, + { + "epoch": 0.3890320449289726, + "grad_norm": 1.3015679121017456, + "learning_rate": 7.669116889823954e-07, + "logits/chosen": -0.49182361364364624, + "logits/rejected": -0.5180585384368896, + "logps/chosen": -0.8816227912902832, + "logps/rejected": -0.9516821503639221, + "loss": 1.3449, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7632455825805664, + "rewards/margins": 0.14011862874031067, + "rewards/rejected": -1.9033643007278442, + "step": 184 + }, + { + "epoch": 0.3911463495209779, + "grad_norm": 4.280956268310547, + "learning_rate": 7.637717044787526e-07, + "logits/chosen": -0.5702117681503296, + "logits/rejected": -0.5475804209709167, + "logps/chosen": -0.9307697415351868, + "logps/rejected": -1.0322346687316895, + "loss": 1.3434, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.8615394830703735, + "rewards/margins": 0.20292985439300537, + "rewards/rejected": -2.064469337463379, + "step": 185 + }, + { + "epoch": 0.39326065411298317, + "grad_norm": 1.3511455059051514, + "learning_rate": 7.606172391185699e-07, + "logits/chosen": -0.5466108322143555, + "logits/rejected": -0.551085352897644, + "logps/chosen": -1.0657893419265747, + "logps/rejected": -1.15786612033844, + "loss": 1.3549, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -2.1315786838531494, + "rewards/margins": 0.18415334820747375, + "rewards/rejected": -2.31573224067688, + "step": 186 + }, + { + "epoch": 0.39537495870498846, + "grad_norm": 0.7001176476478577, + "learning_rate": 7.574484660794836e-07, + "logits/chosen": -0.4849010407924652, + "logits/rejected": -0.5057946443557739, + "logps/chosen": -1.0784757137298584, + "logps/rejected": -1.2035218477249146, + "loss": 1.3556, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.156951427459717, + "rewards/margins": 0.25009211897850037, + "rewards/rejected": -2.407043695449829, + "step": 187 + }, + { + "epoch": 0.39748926329699374, + "grad_norm": 3.1405649185180664, + "learning_rate": 7.542655593246103e-07, + "logits/chosen": -0.5316596031188965, + "logits/rejected": -0.5658366680145264, + "logps/chosen": -1.0630009174346924, + "logps/rejected": -1.2867177724838257, + "loss": 1.2612, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.1260018348693848, + "rewards/margins": 0.447433739900589, + "rewards/rejected": -2.5734355449676514, + "step": 188 + }, + { + "epoch": 0.39960356788899903, + "grad_norm": 2.142986297607422, + "learning_rate": 7.510686935929962e-07, + "logits/chosen": -0.5959028005599976, + "logits/rejected": -0.5836039781570435, + "logps/chosen": -1.111003041267395, + "logps/rejected": -1.1858208179473877, + "loss": 1.3958, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.22200608253479, + "rewards/margins": 0.149635449051857, + "rewards/rejected": -2.3716416358947754, + "step": 189 + }, + { + "epoch": 0.40171787248100427, + "grad_norm": 1.9227335453033447, + "learning_rate": 7.478580443900246e-07, + "logits/chosen": -0.607532799243927, + "logits/rejected": -0.6102017760276794, + "logps/chosen": -1.3353261947631836, + "logps/rejected": -1.3975369930267334, + "loss": 1.457, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -2.670652389526367, + "rewards/margins": 0.12442154437303543, + "rewards/rejected": -2.795073986053467, + "step": 190 + }, + { + "epoch": 0.40383217707300956, + "grad_norm": 0.8509105443954468, + "learning_rate": 7.446337879777802e-07, + "logits/chosen": -0.5903070569038391, + "logits/rejected": -0.5728173851966858, + "logps/chosen": -1.27094566822052, + "logps/rejected": -1.3024815320968628, + "loss": 1.4953, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.54189133644104, + "rewards/margins": 0.06307169049978256, + "rewards/rejected": -2.6049630641937256, + "step": 191 + }, + { + "epoch": 0.40594648166501485, + "grad_norm": 1.1561088562011719, + "learning_rate": 7.413961013653725e-07, + "logits/chosen": -0.5578102469444275, + "logits/rejected": -0.5907329320907593, + "logps/chosen": -1.3817013502120972, + "logps/rejected": -1.419295072555542, + "loss": 1.4865, + "rewards/accuracies": 0.515625, + "rewards/chosen": -2.7634027004241943, + "rewards/margins": 0.07518734782934189, + "rewards/rejected": -2.838590145111084, + "step": 192 + } + ], + "logging_steps": 1, + "max_steps": 472, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-256/config.json b/checkpoint-256/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ead276a9e0bbd4a0baae360715d8e80853974eb6 --- /dev/null +++ b/checkpoint-256/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "/home/v-liuzichuan/huggingface/Llama-3.2-3B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/checkpoint-256/generation_config.json b/checkpoint-256/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18d16b4cc3c799dd78e854fd824ddf8a2996285 --- /dev/null +++ b/checkpoint-256/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.2" +} diff --git a/checkpoint-256/model-00001-of-00002.safetensors b/checkpoint-256/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e958b7fe10260654b1f3ca8a192a801185817b4 --- /dev/null +++ b/checkpoint-256/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb726c93cf160c9dd4347fb391f991bf0062910f272422eb8b8bf8e4c73f0b0 +size 4965799096 diff --git a/checkpoint-256/model-00002-of-00002.safetensors b/checkpoint-256/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d8ec98ff46a1146b3e54570c840d0897c19a08e --- /dev/null +++ b/checkpoint-256/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7051a465dcd77fefbffa73b5b90ba8dbe6ebf778ff3340ce9ce5af4ec2d662e8 +size 2247734992 diff --git a/checkpoint-256/model.safetensors.index.json b/checkpoint-256/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ed64de846d720b9a7859dc20575fea8e8ca51940 --- /dev/null +++ b/checkpoint-256/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_size": 7213504512 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-256/special_tokens_map.json b/checkpoint-256/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-256/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-256/tokenizer.json b/checkpoint-256/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-256/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-256/tokenizer_config.json b/checkpoint-256/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdb7bfcc59fd293867d98e124a30b3ddf39c93 --- /dev/null +++ b/checkpoint-256/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-256/trainer_state.json b/checkpoint-256/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8dbad7a11b3f814c9993f562e2c6e8935e1a5171 --- /dev/null +++ b/checkpoint-256/trainer_state.json @@ -0,0 +1,3873 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5412619755533532, + "eval_steps": 500, + "global_step": 256, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002114304592005286, + "grad_norm": 1.5018059015274048, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.3466828167438507, + "logits/rejected": -0.30099987983703613, + "logps/chosen": -0.9345186948776245, + "logps/rejected": -0.9117153882980347, + "loss": 1.4889, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.869037389755249, + "rewards/margins": -0.04560665041208267, + "rewards/rejected": -1.8234307765960693, + "step": 1 + }, + { + "epoch": 0.004228609184010572, + "grad_norm": 0.8093975186347961, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.4310421049594879, + "logits/rejected": -0.39132067561149597, + "logps/chosen": -0.8198825716972351, + "logps/rejected": -0.8644211888313293, + "loss": 1.376, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.6397651433944702, + "rewards/margins": 0.08907715976238251, + "rewards/rejected": -1.7288423776626587, + "step": 2 + }, + { + "epoch": 0.006342913776015857, + "grad_norm": 0.5377389788627625, + "learning_rate": 6.25e-08, + "logits/chosen": -0.46692028641700745, + "logits/rejected": -0.4649256467819214, + "logps/chosen": -0.9087910652160645, + "logps/rejected": -0.9648240804672241, + "loss": 1.3404, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.817582130432129, + "rewards/margins": 0.11206617206335068, + "rewards/rejected": -1.9296481609344482, + "step": 3 + }, + { + "epoch": 0.008457218368021144, + "grad_norm": 0.3221875727176666, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.416828453540802, + "logits/rejected": -0.3584724962711334, + "logps/chosen": -0.7818898558616638, + "logps/rejected": -0.8170815110206604, + "loss": 1.3806, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.5637797117233276, + "rewards/margins": 0.07038339227437973, + "rewards/rejected": -1.6341630220413208, + "step": 4 + }, + { + "epoch": 0.010571522960026428, + "grad_norm": 0.64655601978302, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.376886248588562, + "logits/rejected": -0.3516141474246979, + "logps/chosen": -0.8814125061035156, + "logps/rejected": -1.0214396715164185, + "loss": 1.2741, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7628250122070312, + "rewards/margins": 0.28005433082580566, + "rewards/rejected": -2.042879343032837, + "step": 5 + }, + { + "epoch": 0.012685827552031714, + "grad_norm": 0.4775894582271576, + "learning_rate": 1.25e-07, + "logits/chosen": -0.4757865369319916, + "logits/rejected": -0.4498941898345947, + "logps/chosen": -0.8962199687957764, + "logps/rejected": -0.9462199807167053, + "loss": 1.364, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.7924399375915527, + "rewards/margins": 0.10000008344650269, + "rewards/rejected": -1.8924399614334106, + "step": 6 + }, + { + "epoch": 0.014800132144037, + "grad_norm": 1.2459568977355957, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.38895344734191895, + "logits/rejected": -0.38165366649627686, + "logps/chosen": -0.9025766253471375, + "logps/rejected": -0.9465017318725586, + "loss": 1.3898, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.805153250694275, + "rewards/margins": 0.0878501906991005, + "rewards/rejected": -1.8930034637451172, + "step": 7 + }, + { + "epoch": 0.016914436736042288, + "grad_norm": 0.6195729374885559, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.3964853286743164, + "logits/rejected": -0.377862811088562, + "logps/chosen": -0.9054160118103027, + "logps/rejected": -0.9605879187583923, + "loss": 1.3821, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8108320236206055, + "rewards/margins": 0.1103438138961792, + "rewards/rejected": -1.9211758375167847, + "step": 8 + }, + { + "epoch": 0.019028741328047574, + "grad_norm": 1.2074137926101685, + "learning_rate": 1.875e-07, + "logits/chosen": -0.3729037344455719, + "logits/rejected": -0.38143450021743774, + "logps/chosen": -0.9328653216362, + "logps/rejected": -0.9905799627304077, + "loss": 1.3754, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8657306432724, + "rewards/margins": 0.11542946100234985, + "rewards/rejected": -1.9811599254608154, + "step": 9 + }, + { + "epoch": 0.021143045920052856, + "grad_norm": 0.2867220640182495, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.4263336658477783, + "logits/rejected": -0.42903271317481995, + "logps/chosen": -0.8979260325431824, + "logps/rejected": -0.9078099727630615, + "loss": 1.4438, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7958520650863647, + "rewards/margins": 0.019767940044403076, + "rewards/rejected": -1.815619945526123, + "step": 10 + }, + { + "epoch": 0.023257350512058142, + "grad_norm": 0.8363026976585388, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.3374914526939392, + "logits/rejected": -0.32399696111679077, + "logps/chosen": -0.8886098861694336, + "logps/rejected": -0.9484556317329407, + "loss": 1.3422, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7772197723388672, + "rewards/margins": 0.11969132721424103, + "rewards/rejected": -1.8969112634658813, + "step": 11 + }, + { + "epoch": 0.025371655104063428, + "grad_norm": 0.5406804084777832, + "learning_rate": 2.5e-07, + "logits/chosen": -0.42844679951667786, + "logits/rejected": -0.37984615564346313, + "logps/chosen": -0.861629843711853, + "logps/rejected": -0.8968492150306702, + "loss": 1.3922, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.723259687423706, + "rewards/margins": 0.07043875753879547, + "rewards/rejected": -1.7936984300613403, + "step": 12 + }, + { + "epoch": 0.027485959696068714, + "grad_norm": 0.9919329285621643, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.36495402455329895, + "logits/rejected": -0.3249490261077881, + "logps/chosen": -0.8502095937728882, + "logps/rejected": -0.8470643758773804, + "loss": 1.4334, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7004191875457764, + "rewards/margins": -0.006290358491241932, + "rewards/rejected": -1.6941287517547607, + "step": 13 + }, + { + "epoch": 0.029600264288074, + "grad_norm": 0.5477162003517151, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.4155704081058502, + "logits/rejected": -0.39535820484161377, + "logps/chosen": -1.0430240631103516, + "logps/rejected": -1.1318373680114746, + "loss": 1.3533, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.086048126220703, + "rewards/margins": 0.17762640118598938, + "rewards/rejected": -2.263674736022949, + "step": 14 + }, + { + "epoch": 0.031714568880079286, + "grad_norm": 0.26530712842941284, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.4810572564601898, + "logits/rejected": -0.42454615235328674, + "logps/chosen": -0.8741041421890259, + "logps/rejected": -0.9494178295135498, + "loss": 1.3655, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7482082843780518, + "rewards/margins": 0.15062758326530457, + "rewards/rejected": -1.8988356590270996, + "step": 15 + }, + { + "epoch": 0.033828873472084575, + "grad_norm": 0.9272629618644714, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.4440098702907562, + "logits/rejected": -0.3930297791957855, + "logps/chosen": -0.8473359942436218, + "logps/rejected": -0.9369213581085205, + "loss": 1.3248, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6946719884872437, + "rewards/margins": 0.17917080223560333, + "rewards/rejected": -1.873842716217041, + "step": 16 + }, + { + "epoch": 0.03594317806408986, + "grad_norm": 0.5912418961524963, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.3838099539279938, + "logits/rejected": -0.3507584035396576, + "logps/chosen": -0.8888350129127502, + "logps/rejected": -0.9361770749092102, + "loss": 1.383, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.7776700258255005, + "rewards/margins": 0.0946839451789856, + "rewards/rejected": -1.8723541498184204, + "step": 17 + }, + { + "epoch": 0.03805748265609515, + "grad_norm": 0.6536504030227661, + "learning_rate": 3.75e-07, + "logits/chosen": -0.3581697940826416, + "logits/rejected": -0.3620460629463196, + "logps/chosen": -0.8519617319107056, + "logps/rejected": -0.9022184610366821, + "loss": 1.3841, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7039234638214111, + "rewards/margins": 0.10051343590021133, + "rewards/rejected": -1.8044369220733643, + "step": 18 + }, + { + "epoch": 0.04017178724810043, + "grad_norm": 0.3433632552623749, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.37887442111968994, + "logits/rejected": -0.37543320655822754, + "logps/chosen": -0.9464104175567627, + "logps/rejected": -1.0017329454421997, + "loss": 1.3649, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8928208351135254, + "rewards/margins": 0.11064518243074417, + "rewards/rejected": -2.0034658908843994, + "step": 19 + }, + { + "epoch": 0.04228609184010571, + "grad_norm": 0.9764007329940796, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.44110679626464844, + "logits/rejected": -0.4280649721622467, + "logps/chosen": -0.9046768546104431, + "logps/rejected": -1.0464633703231812, + "loss": 1.2592, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.8093537092208862, + "rewards/margins": 0.2835729420185089, + "rewards/rejected": -2.0929267406463623, + "step": 20 + }, + { + "epoch": 0.044400396432111, + "grad_norm": 1.8563830852508545, + "learning_rate": 4.375e-07, + "logits/chosen": -0.45183491706848145, + "logits/rejected": -0.42935287952423096, + "logps/chosen": -0.9043138027191162, + "logps/rejected": -0.9462392926216125, + "loss": 1.3784, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.8086276054382324, + "rewards/margins": 0.08385094255208969, + "rewards/rejected": -1.892478585243225, + "step": 21 + }, + { + "epoch": 0.046514701024116284, + "grad_norm": 1.3473299741744995, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.37855517864227295, + "logits/rejected": -0.34429043531417847, + "logps/chosen": -0.9284683465957642, + "logps/rejected": -0.9454050064086914, + "loss": 1.4346, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.8569366931915283, + "rewards/margins": 0.03387312963604927, + "rewards/rejected": -1.8908100128173828, + "step": 22 + }, + { + "epoch": 0.04862900561612157, + "grad_norm": 0.940831184387207, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.39172160625457764, + "logits/rejected": -0.3695780634880066, + "logps/chosen": -0.9314202666282654, + "logps/rejected": -1.020229697227478, + "loss": 1.3322, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8628405332565308, + "rewards/margins": 0.17761869728565216, + "rewards/rejected": -2.040459394454956, + "step": 23 + }, + { + "epoch": 0.050743310208126856, + "grad_norm": 0.5783158540725708, + "learning_rate": 5e-07, + "logits/chosen": -0.4958629608154297, + "logits/rejected": -0.4257377088069916, + "logps/chosen": -0.9379237294197083, + "logps/rejected": -0.9415461421012878, + "loss": 1.441, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.8758474588394165, + "rewards/margins": 0.0072449808940291405, + "rewards/rejected": -1.8830922842025757, + "step": 24 + }, + { + "epoch": 0.052857614800132145, + "grad_norm": 1.4209853410720825, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -0.36407172679901123, + "logits/rejected": -0.3331725299358368, + "logps/chosen": -0.9192589521408081, + "logps/rejected": -0.9595308899879456, + "loss": 1.3994, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8385179042816162, + "rewards/margins": 0.080544114112854, + "rewards/rejected": -1.9190617799758911, + "step": 25 + }, + { + "epoch": 0.05497191939213743, + "grad_norm": 0.6310216188430786, + "learning_rate": 5.416666666666666e-07, + "logits/chosen": -0.41772690415382385, + "logits/rejected": -0.36565953493118286, + "logps/chosen": -0.8052878379821777, + "logps/rejected": -0.8673746585845947, + "loss": 1.3356, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6105756759643555, + "rewards/margins": 0.12417369335889816, + "rewards/rejected": -1.7347493171691895, + "step": 26 + }, + { + "epoch": 0.05708622398414272, + "grad_norm": 1.2933462858200073, + "learning_rate": 5.625e-07, + "logits/chosen": -0.4482795000076294, + "logits/rejected": -0.39409321546554565, + "logps/chosen": -0.8339261412620544, + "logps/rejected": -0.8675202131271362, + "loss": 1.3739, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.6678522825241089, + "rewards/margins": 0.06718815863132477, + "rewards/rejected": -1.7350404262542725, + "step": 27 + }, + { + "epoch": 0.059200528576148, + "grad_norm": 0.5808025002479553, + "learning_rate": 5.833333333333334e-07, + "logits/chosen": -0.37116044759750366, + "logits/rejected": -0.3478051722049713, + "logps/chosen": -0.8950318694114685, + "logps/rejected": -0.9756672978401184, + "loss": 1.3505, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.790063738822937, + "rewards/margins": 0.1612708568572998, + "rewards/rejected": -1.9513345956802368, + "step": 28 + }, + { + "epoch": 0.06131483316815329, + "grad_norm": 1.0569533109664917, + "learning_rate": 6.041666666666666e-07, + "logits/chosen": -0.421148419380188, + "logits/rejected": -0.38443076610565186, + "logps/chosen": -0.8021283745765686, + "logps/rejected": -0.8370179533958435, + "loss": 1.3916, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.6042567491531372, + "rewards/margins": 0.06977920234203339, + "rewards/rejected": -1.674035906791687, + "step": 29 + }, + { + "epoch": 0.06342913776015857, + "grad_norm": 0.42577147483825684, + "learning_rate": 6.249999999999999e-07, + "logits/chosen": -0.4429818391799927, + "logits/rejected": -0.3524704575538635, + "logps/chosen": -0.8916822671890259, + "logps/rejected": -0.8985542058944702, + "loss": 1.4321, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.7833645343780518, + "rewards/margins": 0.01374388113617897, + "rewards/rejected": -1.7971084117889404, + "step": 30 + }, + { + "epoch": 0.06554344235216386, + "grad_norm": 1.0056904554367065, + "learning_rate": 6.458333333333333e-07, + "logits/chosen": -0.376451700925827, + "logits/rejected": -0.342519074678421, + "logps/chosen": -0.9038617014884949, + "logps/rejected": -0.953092634677887, + "loss": 1.398, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8077234029769897, + "rewards/margins": 0.09846188127994537, + "rewards/rejected": -1.906185269355774, + "step": 31 + }, + { + "epoch": 0.06765774694416915, + "grad_norm": 0.5494012236595154, + "learning_rate": 6.666666666666666e-07, + "logits/chosen": -0.3459138870239258, + "logits/rejected": -0.3590989410877228, + "logps/chosen": -0.8274999260902405, + "logps/rejected": -0.8776509761810303, + "loss": 1.363, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.654999852180481, + "rewards/margins": 0.1003020703792572, + "rewards/rejected": -1.7553019523620605, + "step": 32 + }, + { + "epoch": 0.06977205153617443, + "grad_norm": 0.693267822265625, + "learning_rate": 6.875e-07, + "logits/chosen": -0.40053680539131165, + "logits/rejected": -0.37323904037475586, + "logps/chosen": -0.8255244493484497, + "logps/rejected": -0.8658804893493652, + "loss": 1.3712, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6510488986968994, + "rewards/margins": 0.08071210980415344, + "rewards/rejected": -1.7317609786987305, + "step": 33 + }, + { + "epoch": 0.07188635612817972, + "grad_norm": 2.213238000869751, + "learning_rate": 7.083333333333334e-07, + "logits/chosen": -0.40097948908805847, + "logits/rejected": -0.38190510869026184, + "logps/chosen": -0.9122671484947205, + "logps/rejected": -0.9549552798271179, + "loss": 1.36, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.824534296989441, + "rewards/margins": 0.0853763073682785, + "rewards/rejected": -1.9099105596542358, + "step": 34 + }, + { + "epoch": 0.074000660720185, + "grad_norm": 0.6859830021858215, + "learning_rate": 7.291666666666666e-07, + "logits/chosen": -0.42501094937324524, + "logits/rejected": -0.42549416422843933, + "logps/chosen": -1.0008373260498047, + "logps/rejected": -1.1157118082046509, + "loss": 1.3294, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.0016746520996094, + "rewards/margins": 0.22974897921085358, + "rewards/rejected": -2.2314236164093018, + "step": 35 + }, + { + "epoch": 0.0761149653121903, + "grad_norm": 0.6468721628189087, + "learning_rate": 7.5e-07, + "logits/chosen": -0.36494994163513184, + "logits/rejected": -0.30433908104896545, + "logps/chosen": -0.9062094688415527, + "logps/rejected": -0.920263409614563, + "loss": 1.4312, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.8124189376831055, + "rewards/margins": 0.02810765616595745, + "rewards/rejected": -1.840526819229126, + "step": 36 + }, + { + "epoch": 0.07822926990419557, + "grad_norm": 0.5085556507110596, + "learning_rate": 7.708333333333333e-07, + "logits/chosen": -0.4677881598472595, + "logits/rejected": -0.456132709980011, + "logps/chosen": -1.0101865530014038, + "logps/rejected": -1.0429682731628418, + "loss": 1.4132, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -2.0203731060028076, + "rewards/margins": 0.06556359678506851, + "rewards/rejected": -2.0859365463256836, + "step": 37 + }, + { + "epoch": 0.08034357449620086, + "grad_norm": 0.23813335597515106, + "learning_rate": 7.916666666666666e-07, + "logits/chosen": -0.3991190791130066, + "logits/rejected": -0.3664044141769409, + "logps/chosen": -0.9578174352645874, + "logps/rejected": -0.9229263067245483, + "loss": 1.4824, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.9156348705291748, + "rewards/margins": -0.06978224962949753, + "rewards/rejected": -1.8458526134490967, + "step": 38 + }, + { + "epoch": 0.08245787908820615, + "grad_norm": 0.587037980556488, + "learning_rate": 8.125e-07, + "logits/chosen": -0.37554049491882324, + "logits/rejected": -0.36305734515190125, + "logps/chosen": -0.8503091931343079, + "logps/rejected": -0.864615261554718, + "loss": 1.4086, + "rewards/accuracies": 0.4296875, + "rewards/chosen": -1.7006183862686157, + "rewards/margins": 0.028611989691853523, + "rewards/rejected": -1.729230523109436, + "step": 39 + }, + { + "epoch": 0.08457218368021142, + "grad_norm": 0.4172501862049103, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.4405443072319031, + "logits/rejected": -0.41723060607910156, + "logps/chosen": -0.8502858877182007, + "logps/rejected": -0.9114271402359009, + "loss": 1.3446, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7005717754364014, + "rewards/margins": 0.12228240817785263, + "rewards/rejected": -1.8228542804718018, + "step": 40 + }, + { + "epoch": 0.08668648827221671, + "grad_norm": 0.9275372624397278, + "learning_rate": 8.541666666666666e-07, + "logits/chosen": -0.4200601577758789, + "logits/rejected": -0.3478623628616333, + "logps/chosen": -0.892408013343811, + "logps/rejected": -0.9276402592658997, + "loss": 1.3887, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.784816026687622, + "rewards/margins": 0.07046431303024292, + "rewards/rejected": -1.8552805185317993, + "step": 41 + }, + { + "epoch": 0.088800792864222, + "grad_norm": 0.7317383289337158, + "learning_rate": 8.75e-07, + "logits/chosen": -0.37675267457962036, + "logits/rejected": -0.33540332317352295, + "logps/chosen": -0.7866061925888062, + "logps/rejected": -0.824250340461731, + "loss": 1.3837, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5732123851776123, + "rewards/margins": 0.07528844475746155, + "rewards/rejected": -1.648500680923462, + "step": 42 + }, + { + "epoch": 0.09091509745622729, + "grad_norm": 0.9452736973762512, + "learning_rate": 8.958333333333334e-07, + "logits/chosen": -0.4662383198738098, + "logits/rejected": -0.4447881579399109, + "logps/chosen": -0.9490666389465332, + "logps/rejected": -1.0112388134002686, + "loss": 1.3412, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8981332778930664, + "rewards/margins": 0.12434446066617966, + "rewards/rejected": -2.022477626800537, + "step": 43 + }, + { + "epoch": 0.09302940204823257, + "grad_norm": 0.2848323881626129, + "learning_rate": 9.166666666666665e-07, + "logits/chosen": -0.41404005885124207, + "logits/rejected": -0.3944583535194397, + "logps/chosen": -0.8224930167198181, + "logps/rejected": -0.8416361808776855, + "loss": 1.4027, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.6449860334396362, + "rewards/margins": 0.038286346942186356, + "rewards/rejected": -1.683272361755371, + "step": 44 + }, + { + "epoch": 0.09514370664023786, + "grad_norm": 0.7165678143501282, + "learning_rate": 9.374999999999999e-07, + "logits/chosen": -0.40475326776504517, + "logits/rejected": -0.3559921383857727, + "logps/chosen": -0.8070214986801147, + "logps/rejected": -0.8993593454360962, + "loss": 1.3148, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6140429973602295, + "rewards/margins": 0.18467575311660767, + "rewards/rejected": -1.7987186908721924, + "step": 45 + }, + { + "epoch": 0.09725801123224315, + "grad_norm": 0.4779021739959717, + "learning_rate": 9.583333333333334e-07, + "logits/chosen": -0.4171525835990906, + "logits/rejected": -0.42166149616241455, + "logps/chosen": -0.7872560024261475, + "logps/rejected": -0.8496187925338745, + "loss": 1.3356, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.574512004852295, + "rewards/margins": 0.12472567707300186, + "rewards/rejected": -1.699237585067749, + "step": 46 + }, + { + "epoch": 0.09937231582424844, + "grad_norm": 0.7870219349861145, + "learning_rate": 9.791666666666667e-07, + "logits/chosen": -0.3734116554260254, + "logits/rejected": -0.32778748869895935, + "logps/chosen": -0.7842286825180054, + "logps/rejected": -0.8161548972129822, + "loss": 1.3647, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5684573650360107, + "rewards/margins": 0.06385258585214615, + "rewards/rejected": -1.6323097944259644, + "step": 47 + }, + { + "epoch": 0.10148662041625371, + "grad_norm": 0.2597256600856781, + "learning_rate": 1e-06, + "logits/chosen": -0.4355677664279938, + "logits/rejected": -0.38983187079429626, + "logps/chosen": -0.8787693977355957, + "logps/rejected": -0.9383041262626648, + "loss": 1.35, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.7575387954711914, + "rewards/margins": 0.11906948685646057, + "rewards/rejected": -1.8766082525253296, + "step": 48 + }, + { + "epoch": 0.103600925008259, + "grad_norm": 0.9942799210548401, + "learning_rate": 9.999862751990697e-07, + "logits/chosen": -0.4244321882724762, + "logits/rejected": -0.4366786777973175, + "logps/chosen": -0.7910157442092896, + "logps/rejected": -0.8630884885787964, + "loss": 1.3166, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.582031488418579, + "rewards/margins": 0.14414538443088531, + "rewards/rejected": -1.7261769771575928, + "step": 49 + }, + { + "epoch": 0.10571522960026429, + "grad_norm": 0.5333903431892395, + "learning_rate": 9.999451015497595e-07, + "logits/chosen": -0.389942467212677, + "logits/rejected": -0.36674585938453674, + "logps/chosen": -0.7312074899673462, + "logps/rejected": -0.7289648652076721, + "loss": 1.4225, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4624149799346924, + "rewards/margins": -0.004485193639993668, + "rewards/rejected": -1.4579297304153442, + "step": 50 + }, + { + "epoch": 0.10782953419226958, + "grad_norm": 0.5712242722511292, + "learning_rate": 9.9987648131247e-07, + "logits/chosen": -0.4622853994369507, + "logits/rejected": -0.3728552460670471, + "logps/chosen": -0.8764299750328064, + "logps/rejected": -0.869678795337677, + "loss": 1.4542, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.7528599500656128, + "rewards/margins": -0.013502337038516998, + "rewards/rejected": -1.739357590675354, + "step": 51 + }, + { + "epoch": 0.10994383878427486, + "grad_norm": 0.2586441934108734, + "learning_rate": 9.99780418254397e-07, + "logits/chosen": -0.37249019742012024, + "logits/rejected": -0.3998304605484009, + "logps/chosen": -0.8435611724853516, + "logps/rejected": -0.9359882473945618, + "loss": 1.3057, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6871223449707031, + "rewards/margins": 0.18485431373119354, + "rewards/rejected": -1.8719764947891235, + "step": 52 + }, + { + "epoch": 0.11205814337628014, + "grad_norm": 1.0829113721847534, + "learning_rate": 9.996569176493268e-07, + "logits/chosen": -0.47697725892066956, + "logits/rejected": -0.4208195209503174, + "logps/chosen": -0.8014968037605286, + "logps/rejected": -0.8703804612159729, + "loss": 1.3523, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6029936075210571, + "rewards/margins": 0.1377674788236618, + "rewards/rejected": -1.7407609224319458, + "step": 53 + }, + { + "epoch": 0.11417244796828543, + "grad_norm": 0.5523208379745483, + "learning_rate": 9.995059862773438e-07, + "logits/chosen": -0.40533363819122314, + "logits/rejected": -0.36801978945732117, + "logps/chosen": -0.7641825675964355, + "logps/rejected": -0.8168596029281616, + "loss": 1.3692, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.528365135192871, + "rewards/margins": 0.10535416752099991, + "rewards/rejected": -1.6337192058563232, + "step": 54 + }, + { + "epoch": 0.11628675256029072, + "grad_norm": 0.614101767539978, + "learning_rate": 9.993276324244605e-07, + "logits/chosen": -0.4476906955242157, + "logits/rejected": -0.40396648645401, + "logps/chosen": -0.8706808090209961, + "logps/rejected": -0.9221430420875549, + "loss": 1.3787, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7413616180419922, + "rewards/margins": 0.10292442888021469, + "rewards/rejected": -1.8442860841751099, + "step": 55 + }, + { + "epoch": 0.118401057152296, + "grad_norm": 0.3428778052330017, + "learning_rate": 9.991218658821608e-07, + "logits/chosen": -0.31709593534469604, + "logits/rejected": -0.2760937213897705, + "logps/chosen": -0.842248797416687, + "logps/rejected": -0.8068034648895264, + "loss": 1.498, + "rewards/accuracies": 0.4375, + "rewards/chosen": -1.684497594833374, + "rewards/margins": -0.07089066505432129, + "rewards/rejected": -1.6136069297790527, + "step": 56 + }, + { + "epoch": 0.12051536174430129, + "grad_norm": 0.6877723932266235, + "learning_rate": 9.988886979468643e-07, + "logits/chosen": -0.41800016164779663, + "logits/rejected": -0.4011584222316742, + "logps/chosen": -0.7845420837402344, + "logps/rejected": -0.834447979927063, + "loss": 1.3491, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5690841674804688, + "rewards/margins": 0.09981165081262589, + "rewards/rejected": -1.668895959854126, + "step": 57 + }, + { + "epoch": 0.12262966633630658, + "grad_norm": 0.9649701714515686, + "learning_rate": 9.98628141419305e-07, + "logits/chosen": -0.4253537058830261, + "logits/rejected": -0.4305458962917328, + "logps/chosen": -0.86476731300354, + "logps/rejected": -0.9080386161804199, + "loss": 1.3639, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.72953462600708, + "rewards/margins": 0.08654248714447021, + "rewards/rejected": -1.8160772323608398, + "step": 58 + }, + { + "epoch": 0.12474397092831185, + "grad_norm": 1.3779780864715576, + "learning_rate": 9.98340210603829e-07, + "logits/chosen": -0.39970022439956665, + "logits/rejected": -0.441428005695343, + "logps/chosen": -0.8662775158882141, + "logps/rejected": -0.9646260738372803, + "loss": 1.3001, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7325550317764282, + "rewards/margins": 0.19669723510742188, + "rewards/rejected": -1.9292521476745605, + "step": 59 + }, + { + "epoch": 0.12685827552031714, + "grad_norm": 0.5366966724395752, + "learning_rate": 9.980249213076084e-07, + "logits/chosen": -0.37770116329193115, + "logits/rejected": -0.35231757164001465, + "logps/chosen": -0.8165755867958069, + "logps/rejected": -0.8619179129600525, + "loss": 1.3699, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6331511735916138, + "rewards/margins": 0.09068439900875092, + "rewards/rejected": -1.723835825920105, + "step": 60 + }, + { + "epoch": 0.12897258011232243, + "grad_norm": 0.36810922622680664, + "learning_rate": 9.976822908397748e-07, + "logits/chosen": -0.4224976897239685, + "logits/rejected": -0.41758257150650024, + "logps/chosen": -0.8445641994476318, + "logps/rejected": -0.9393664598464966, + "loss": 1.3193, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6891283988952637, + "rewards/margins": 0.18960458040237427, + "rewards/rejected": -1.8787329196929932, + "step": 61 + }, + { + "epoch": 0.13108688470432772, + "grad_norm": 0.6838279366493225, + "learning_rate": 9.97312338010468e-07, + "logits/chosen": -0.4168627858161926, + "logits/rejected": -0.36115381121635437, + "logps/chosen": -0.8370552659034729, + "logps/rejected": -0.8352169394493103, + "loss": 1.4284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6741105318069458, + "rewards/margins": -0.0036766715347766876, + "rewards/rejected": -1.6704338788986206, + "step": 62 + }, + { + "epoch": 0.133201189296333, + "grad_norm": 0.39330533146858215, + "learning_rate": 9.969150831298037e-07, + "logits/chosen": -0.4558233618736267, + "logits/rejected": -0.4025765061378479, + "logps/chosen": -0.826255738735199, + "logps/rejected": -0.894213080406189, + "loss": 1.3485, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.652511477470398, + "rewards/margins": 0.13591471314430237, + "rewards/rejected": -1.788426160812378, + "step": 63 + }, + { + "epoch": 0.1353154938883383, + "grad_norm": 0.6055929660797119, + "learning_rate": 9.964905480067584e-07, + "logits/chosen": -0.459463506937027, + "logits/rejected": -0.42943331599235535, + "logps/chosen": -0.7901928424835205, + "logps/rejected": -0.7964221239089966, + "loss": 1.4057, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.580385684967041, + "rewards/margins": 0.012458762153983116, + "rewards/rejected": -1.5928442478179932, + "step": 64 + }, + { + "epoch": 0.13742979848034356, + "grad_norm": 0.37883859872817993, + "learning_rate": 9.960387559479725e-07, + "logits/chosen": -0.4447207450866699, + "logits/rejected": -0.371269553899765, + "logps/chosen": -0.7863065004348755, + "logps/rejected": -0.7983666658401489, + "loss": 1.4202, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.572613000869751, + "rewards/margins": 0.02412020042538643, + "rewards/rejected": -1.5967333316802979, + "step": 65 + }, + { + "epoch": 0.13954410307234885, + "grad_norm": 0.31330156326293945, + "learning_rate": 9.955597317564703e-07, + "logits/chosen": -0.42059677839279175, + "logits/rejected": -0.37605100870132446, + "logps/chosen": -0.7669360637664795, + "logps/rejected": -0.8348797559738159, + "loss": 1.3368, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.533872127532959, + "rewards/margins": 0.13588732481002808, + "rewards/rejected": -1.6697595119476318, + "step": 66 + }, + { + "epoch": 0.14165840766435414, + "grad_norm": 0.4353170096874237, + "learning_rate": 9.950535017302983e-07, + "logits/chosen": -0.3897082805633545, + "logits/rejected": -0.38229796290397644, + "logps/chosen": -0.7249190807342529, + "logps/rejected": -0.7696882486343384, + "loss": 1.3511, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4498381614685059, + "rewards/margins": 0.08953814208507538, + "rewards/rejected": -1.5393764972686768, + "step": 67 + }, + { + "epoch": 0.14377271225635943, + "grad_norm": 0.6724106669425964, + "learning_rate": 9.94520093661082e-07, + "logits/chosen": -0.3687596023082733, + "logits/rejected": -0.34222811460494995, + "logps/chosen": -0.7845972776412964, + "logps/rejected": -0.8308086395263672, + "loss": 1.3737, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5691945552825928, + "rewards/margins": 0.09242270141839981, + "rewards/rejected": -1.6616172790527344, + "step": 68 + }, + { + "epoch": 0.14588701684836472, + "grad_norm": 0.7312172651290894, + "learning_rate": 9.939595368324994e-07, + "logits/chosen": -0.4475817382335663, + "logits/rejected": -0.3975730538368225, + "logps/chosen": -0.7314785718917847, + "logps/rejected": -0.7924487590789795, + "loss": 1.3439, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4629571437835693, + "rewards/margins": 0.12194043397903442, + "rewards/rejected": -1.584897518157959, + "step": 69 + }, + { + "epoch": 0.14800132144037, + "grad_norm": 0.301097571849823, + "learning_rate": 9.933718620186744e-07, + "logits/chosen": -0.402032732963562, + "logits/rejected": -0.3640722632408142, + "logps/chosen": -0.7727882862091064, + "logps/rejected": -0.8291516304016113, + "loss": 1.358, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.545576572418213, + "rewards/margins": 0.11272668838500977, + "rewards/rejected": -1.6583032608032227, + "step": 70 + }, + { + "epoch": 0.1501156260323753, + "grad_norm": 0.377835750579834, + "learning_rate": 9.92757101482486e-07, + "logits/chosen": -0.316825270652771, + "logits/rejected": -0.3245603144168854, + "logps/chosen": -0.7962774634361267, + "logps/rejected": -0.8610175848007202, + "loss": 1.3464, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5925549268722534, + "rewards/margins": 0.12948019802570343, + "rewards/rejected": -1.7220351696014404, + "step": 71 + }, + { + "epoch": 0.1522299306243806, + "grad_norm": 0.84058678150177, + "learning_rate": 9.921152889737984e-07, + "logits/chosen": -0.4446060359477997, + "logits/rejected": -0.43160340189933777, + "logps/chosen": -0.7745426297187805, + "logps/rejected": -0.8286185264587402, + "loss": 1.3408, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.549085259437561, + "rewards/margins": 0.10815180093050003, + "rewards/rejected": -1.6572370529174805, + "step": 72 + }, + { + "epoch": 0.15434423521638585, + "grad_norm": 0.6970808506011963, + "learning_rate": 9.91446459727607e-07, + "logits/chosen": -0.4220297634601593, + "logits/rejected": -0.404453307390213, + "logps/chosen": -0.8769615888595581, + "logps/rejected": -0.9564313292503357, + "loss": 1.3423, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7539231777191162, + "rewards/margins": 0.15893957018852234, + "rewards/rejected": -1.9128626585006714, + "step": 73 + }, + { + "epoch": 0.15645853980839114, + "grad_norm": 0.5900676250457764, + "learning_rate": 9.90750650462105e-07, + "logits/chosen": -0.41884100437164307, + "logits/rejected": -0.38551777601242065, + "logps/chosen": -0.814996063709259, + "logps/rejected": -0.8892688751220703, + "loss": 1.3325, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.629992127418518, + "rewards/margins": 0.14854571223258972, + "rewards/rejected": -1.7785377502441406, + "step": 74 + }, + { + "epoch": 0.15857284440039643, + "grad_norm": 0.7245749831199646, + "learning_rate": 9.900278993766668e-07, + "logits/chosen": -0.3451727330684662, + "logits/rejected": -0.3348972201347351, + "logps/chosen": -0.8788102865219116, + "logps/rejected": -0.9273182153701782, + "loss": 1.377, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7576205730438232, + "rewards/margins": 0.09701582789421082, + "rewards/rejected": -1.8546364307403564, + "step": 75 + }, + { + "epoch": 0.16068714899240172, + "grad_norm": 0.3135383725166321, + "learning_rate": 9.89278246149752e-07, + "logits/chosen": -0.4140404760837555, + "logits/rejected": -0.38082340359687805, + "logps/chosen": -0.7513999342918396, + "logps/rejected": -0.7880118489265442, + "loss": 1.3921, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5027998685836792, + "rewards/margins": 0.07322371751070023, + "rewards/rejected": -1.5760236978530884, + "step": 76 + }, + { + "epoch": 0.162801453584407, + "grad_norm": 0.8594076633453369, + "learning_rate": 9.885017319367252e-07, + "logits/chosen": -0.35951656103134155, + "logits/rejected": -0.30456626415252686, + "logps/chosen": -0.7989844679832458, + "logps/rejected": -0.8145395517349243, + "loss": 1.4061, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5979689359664917, + "rewards/margins": 0.031110182404518127, + "rewards/rejected": -1.6290791034698486, + "step": 77 + }, + { + "epoch": 0.1649157581764123, + "grad_norm": 0.4393538534641266, + "learning_rate": 9.876983993675989e-07, + "logits/chosen": -0.33119240403175354, + "logits/rejected": -0.31970253586769104, + "logps/chosen": -0.721772313117981, + "logps/rejected": -0.7868390679359436, + "loss": 1.3325, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.443544626235962, + "rewards/margins": 0.13013358414173126, + "rewards/rejected": -1.5736781358718872, + "step": 78 + }, + { + "epoch": 0.1670300627684176, + "grad_norm": 0.8017925024032593, + "learning_rate": 9.868682925446909e-07, + "logits/chosen": -0.3608989417552948, + "logits/rejected": -0.3497124910354614, + "logps/chosen": -0.8117240071296692, + "logps/rejected": -0.8506529331207275, + "loss": 1.3671, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6234480142593384, + "rewards/margins": 0.07785768806934357, + "rewards/rejected": -1.701305866241455, + "step": 79 + }, + { + "epoch": 0.16914436736042285, + "grad_norm": 0.5704994201660156, + "learning_rate": 9.860114570402054e-07, + "logits/chosen": -0.37441548705101013, + "logits/rejected": -0.32188406586647034, + "logps/chosen": -0.745419442653656, + "logps/rejected": -0.816170871257782, + "loss": 1.3315, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.490838885307312, + "rewards/margins": 0.1415030062198639, + "rewards/rejected": -1.632341742515564, + "step": 80 + }, + { + "epoch": 0.17125867195242814, + "grad_norm": 3.0005106925964355, + "learning_rate": 9.85127939893729e-07, + "logits/chosen": -0.34791454672813416, + "logits/rejected": -0.32542383670806885, + "logps/chosen": -0.7547991275787354, + "logps/rejected": -0.7868378162384033, + "loss": 1.3742, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5095982551574707, + "rewards/margins": 0.06407731771469116, + "rewards/rejected": -1.5736756324768066, + "step": 81 + }, + { + "epoch": 0.17337297654443343, + "grad_norm": 0.5891271829605103, + "learning_rate": 9.842177896096493e-07, + "logits/chosen": -0.38649702072143555, + "logits/rejected": -0.36892226338386536, + "logps/chosen": -0.7556143999099731, + "logps/rejected": -0.82858806848526, + "loss": 1.3233, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5112287998199463, + "rewards/margins": 0.14594702422618866, + "rewards/rejected": -1.65717613697052, + "step": 82 + }, + { + "epoch": 0.17548728113643872, + "grad_norm": 0.27861157059669495, + "learning_rate": 9.832810561544923e-07, + "logits/chosen": -0.38264670968055725, + "logits/rejected": -0.35908499360084534, + "logps/chosen": -0.7858557105064392, + "logps/rejected": -0.8571599721908569, + "loss": 1.3234, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5717114210128784, + "rewards/margins": 0.1426086127758026, + "rewards/rejected": -1.7143199443817139, + "step": 83 + }, + { + "epoch": 0.177601585728444, + "grad_norm": 0.8158763647079468, + "learning_rate": 9.823177909541793e-07, + "logits/chosen": -0.4076104760169983, + "logits/rejected": -0.3934200704097748, + "logps/chosen": -0.8089872002601624, + "logps/rejected": -0.8885407447814941, + "loss": 1.3476, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6179744005203247, + "rewards/margins": 0.15910708904266357, + "rewards/rejected": -1.7770814895629883, + "step": 84 + }, + { + "epoch": 0.1797158903204493, + "grad_norm": 0.46008333563804626, + "learning_rate": 9.813280468912022e-07, + "logits/chosen": -0.33124151825904846, + "logits/rejected": -0.34535717964172363, + "logps/chosen": -0.733020544052124, + "logps/rejected": -0.8716557621955872, + "loss": 1.2807, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.466041088104248, + "rewards/margins": 0.27727028727531433, + "rewards/rejected": -1.7433115243911743, + "step": 85 + }, + { + "epoch": 0.18183019491245458, + "grad_norm": 0.3784334659576416, + "learning_rate": 9.80311878301722e-07, + "logits/chosen": -0.40713849663734436, + "logits/rejected": -0.3808574080467224, + "logps/chosen": -0.7063947319984436, + "logps/rejected": -0.7589148879051208, + "loss": 1.3501, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4127894639968872, + "rewards/margins": 0.10504024475812912, + "rewards/rejected": -1.5178297758102417, + "step": 86 + }, + { + "epoch": 0.18394449950445987, + "grad_norm": 0.6137813329696655, + "learning_rate": 9.792693409725853e-07, + "logits/chosen": -0.4119255542755127, + "logits/rejected": -0.44221603870391846, + "logps/chosen": -0.795850932598114, + "logps/rejected": -0.8925026059150696, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.591701865196228, + "rewards/margins": 0.19330324232578278, + "rewards/rejected": -1.7850052118301392, + "step": 87 + }, + { + "epoch": 0.18605880409646514, + "grad_norm": 0.5354250073432922, + "learning_rate": 9.78200492138261e-07, + "logits/chosen": -0.3792279064655304, + "logits/rejected": -0.3789527714252472, + "logps/chosen": -0.7249161005020142, + "logps/rejected": -0.8088154792785645, + "loss": 1.3304, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4498322010040283, + "rewards/margins": 0.16779886186122894, + "rewards/rejected": -1.617630958557129, + "step": 88 + }, + { + "epoch": 0.18817310868847043, + "grad_norm": 0.6911243796348572, + "learning_rate": 9.771053904776995e-07, + "logits/chosen": -0.38837429881095886, + "logits/rejected": -0.36597418785095215, + "logps/chosen": -0.7528612017631531, + "logps/rejected": -0.7981135249137878, + "loss": 1.3481, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.5057224035263062, + "rewards/margins": 0.09050464630126953, + "rewards/rejected": -1.5962270498275757, + "step": 89 + }, + { + "epoch": 0.19028741328047571, + "grad_norm": 0.37110790610313416, + "learning_rate": 9.759840961111097e-07, + "logits/chosen": -0.3804919421672821, + "logits/rejected": -0.38750600814819336, + "logps/chosen": -0.8673248291015625, + "logps/rejected": -0.9381619691848755, + "loss": 1.3303, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.734649658203125, + "rewards/margins": 0.14167429506778717, + "rewards/rejected": -1.876323938369751, + "step": 90 + }, + { + "epoch": 0.192401717872481, + "grad_norm": 0.8033086657524109, + "learning_rate": 9.748366705966593e-07, + "logits/chosen": -0.3804866075515747, + "logits/rejected": -0.31055447459220886, + "logps/chosen": -0.7535511255264282, + "logps/rejected": -0.7824290990829468, + "loss": 1.3706, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5071022510528564, + "rewards/margins": 0.057755980640649796, + "rewards/rejected": -1.5648581981658936, + "step": 91 + }, + { + "epoch": 0.1945160224644863, + "grad_norm": 0.6934167742729187, + "learning_rate": 9.736631769270957e-07, + "logits/chosen": -0.443461149930954, + "logits/rejected": -0.4398806691169739, + "logps/chosen": -0.8123858571052551, + "logps/rejected": -0.8972252607345581, + "loss": 1.3464, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6247717142105103, + "rewards/margins": 0.16967862844467163, + "rewards/rejected": -1.7944505214691162, + "step": 92 + }, + { + "epoch": 0.19663032705649158, + "grad_norm": 1.140067458152771, + "learning_rate": 9.724636795262866e-07, + "logits/chosen": -0.43793433904647827, + "logits/rejected": -0.4402340352535248, + "logps/chosen": -0.8155819177627563, + "logps/rejected": -0.8659977912902832, + "loss": 1.3621, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6311638355255127, + "rewards/margins": 0.10083187371492386, + "rewards/rejected": -1.7319955825805664, + "step": 93 + }, + { + "epoch": 0.19874463164849687, + "grad_norm": 1.8303897380828857, + "learning_rate": 9.712382442456844e-07, + "logits/chosen": -0.34288379549980164, + "logits/rejected": -0.36632782220840454, + "logps/chosen": -0.7338054776191711, + "logps/rejected": -0.8537961840629578, + "loss": 1.2942, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4676109552383423, + "rewards/margins": 0.23998141288757324, + "rewards/rejected": -1.7075923681259155, + "step": 94 + }, + { + "epoch": 0.20085893624050213, + "grad_norm": 0.34392252564430237, + "learning_rate": 9.6998693836071e-07, + "logits/chosen": -0.4381723999977112, + "logits/rejected": -0.4031081199645996, + "logps/chosen": -0.7130292057991028, + "logps/rejected": -0.7402217388153076, + "loss": 1.3766, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4260584115982056, + "rewards/margins": 0.0543849840760231, + "rewards/rejected": -1.4804434776306152, + "step": 95 + }, + { + "epoch": 0.20297324083250742, + "grad_norm": 0.4129842519760132, + "learning_rate": 9.687098305670604e-07, + "logits/chosen": -0.39796924591064453, + "logits/rejected": -0.3476859927177429, + "logps/chosen": -0.7520885467529297, + "logps/rejected": -0.8058558702468872, + "loss": 1.3663, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5041770935058594, + "rewards/margins": 0.10753461718559265, + "rewards/rejected": -1.6117117404937744, + "step": 96 + }, + { + "epoch": 0.2050875454245127, + "grad_norm": 0.5054985284805298, + "learning_rate": 9.674069909769362e-07, + "logits/chosen": -0.3942393660545349, + "logits/rejected": -0.3627544045448303, + "logps/chosen": -0.733702540397644, + "logps/rejected": -0.781308650970459, + "loss": 1.342, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.467405080795288, + "rewards/margins": 0.09521210938692093, + "rewards/rejected": -1.562617301940918, + "step": 97 + }, + { + "epoch": 0.207201850016518, + "grad_norm": 0.6975870728492737, + "learning_rate": 9.66078491115194e-07, + "logits/chosen": -0.38557127118110657, + "logits/rejected": -0.3581204414367676, + "logps/chosen": -0.7359838485717773, + "logps/rejected": -0.7648134827613831, + "loss": 1.3841, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.4719676971435547, + "rewards/margins": 0.057659298181533813, + "rewards/rejected": -1.5296269655227661, + "step": 98 + }, + { + "epoch": 0.2093161546085233, + "grad_norm": 0.7575029730796814, + "learning_rate": 9.647244039154177e-07, + "logits/chosen": -0.3871467411518097, + "logits/rejected": -0.3941374123096466, + "logps/chosen": -0.6516871452331543, + "logps/rejected": -0.7066073417663574, + "loss": 1.3364, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3033742904663086, + "rewards/margins": 0.10984040796756744, + "rewards/rejected": -1.4132146835327148, + "step": 99 + }, + { + "epoch": 0.21143045920052858, + "grad_norm": 1.3344250917434692, + "learning_rate": 9.633448037159166e-07, + "logits/chosen": -0.40887755155563354, + "logits/rejected": -0.41733911633491516, + "logps/chosen": -0.6978950500488281, + "logps/rejected": -0.793424129486084, + "loss": 1.3076, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3957901000976562, + "rewards/margins": 0.1910584717988968, + "rewards/rejected": -1.586848258972168, + "step": 100 + }, + { + "epoch": 0.21354476379253387, + "grad_norm": 0.8798456788063049, + "learning_rate": 9.619397662556433e-07, + "logits/chosen": -0.302534282207489, + "logits/rejected": -0.29954588413238525, + "logps/chosen": -0.719552755355835, + "logps/rejected": -0.7628123164176941, + "loss": 1.3699, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.43910551071167, + "rewards/margins": 0.08651915192604065, + "rewards/rejected": -1.5256246328353882, + "step": 101 + }, + { + "epoch": 0.21565906838453916, + "grad_norm": 0.8746365308761597, + "learning_rate": 9.605093686700353e-07, + "logits/chosen": -0.372263640165329, + "logits/rejected": -0.3714321255683899, + "logps/chosen": -0.6665956974029541, + "logps/rejected": -0.7361368536949158, + "loss": 1.3173, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3331913948059082, + "rewards/margins": 0.1390824019908905, + "rewards/rejected": -1.4722737073898315, + "step": 102 + }, + { + "epoch": 0.21777337297654442, + "grad_norm": 0.8450930714607239, + "learning_rate": 9.590536894867812e-07, + "logits/chosen": -0.37228280305862427, + "logits/rejected": -0.37763556838035583, + "logps/chosen": -0.7425979375839233, + "logps/rejected": -0.7557005882263184, + "loss": 1.4085, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4851958751678467, + "rewards/margins": 0.026205357164144516, + "rewards/rejected": -1.5114011764526367, + "step": 103 + }, + { + "epoch": 0.2198876775685497, + "grad_norm": 0.5075035691261292, + "learning_rate": 9.575728086215091e-07, + "logits/chosen": -0.4433964788913727, + "logits/rejected": -0.3782787024974823, + "logps/chosen": -0.7308244109153748, + "logps/rejected": -0.8043883442878723, + "loss": 1.3353, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4616488218307495, + "rewards/margins": 0.14712783694267273, + "rewards/rejected": -1.6087766885757446, + "step": 104 + }, + { + "epoch": 0.222001982160555, + "grad_norm": 1.0270946025848389, + "learning_rate": 9.560668073733993e-07, + "logits/chosen": -0.3593980073928833, + "logits/rejected": -0.3159312903881073, + "logps/chosen": -0.757469892501831, + "logps/rejected": -0.8256179094314575, + "loss": 1.3289, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.514939785003662, + "rewards/margins": 0.13629598915576935, + "rewards/rejected": -1.651235818862915, + "step": 105 + }, + { + "epoch": 0.2241162867525603, + "grad_norm": 0.8390078544616699, + "learning_rate": 9.54535768420721e-07, + "logits/chosen": -0.3266332149505615, + "logits/rejected": -0.3008713722229004, + "logps/chosen": -0.7286102771759033, + "logps/rejected": -0.7803273797035217, + "loss": 1.3593, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4572205543518066, + "rewards/margins": 0.10343428701162338, + "rewards/rejected": -1.5606547594070435, + "step": 106 + }, + { + "epoch": 0.22623059134456558, + "grad_norm": 1.130595088005066, + "learning_rate": 9.529797758162934e-07, + "logits/chosen": -0.36109817028045654, + "logits/rejected": -0.34797021746635437, + "logps/chosen": -0.7723361253738403, + "logps/rejected": -0.8873662352561951, + "loss": 1.2956, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5446722507476807, + "rewards/margins": 0.23006024956703186, + "rewards/rejected": -1.7747324705123901, + "step": 107 + }, + { + "epoch": 0.22834489593657087, + "grad_norm": 0.9399718642234802, + "learning_rate": 9.513989149828717e-07, + "logits/chosen": -0.3596777021884918, + "logits/rejected": -0.3660539388656616, + "logps/chosen": -0.7130635976791382, + "logps/rejected": -0.7378955483436584, + "loss": 1.3774, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4261271953582764, + "rewards/margins": 0.04966379329562187, + "rewards/rejected": -1.475791096687317, + "step": 108 + }, + { + "epoch": 0.23045920052857616, + "grad_norm": 1.097594976425171, + "learning_rate": 9.49793272708457e-07, + "logits/chosen": -0.31783169507980347, + "logits/rejected": -0.3008044362068176, + "logps/chosen": -0.6933202147483826, + "logps/rejected": -0.7510000467300415, + "loss": 1.3498, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3866404294967651, + "rewards/margins": 0.1153596043586731, + "rewards/rejected": -1.502000093460083, + "step": 109 + }, + { + "epoch": 0.23257350512058145, + "grad_norm": 1.096330165863037, + "learning_rate": 9.481629371415313e-07, + "logits/chosen": -0.3582899570465088, + "logits/rejected": -0.3120020031929016, + "logps/chosen": -0.817268431186676, + "logps/rejected": -0.8862374424934387, + "loss": 1.3349, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.634536862373352, + "rewards/margins": 0.13793781399726868, + "rewards/rejected": -1.7724748849868774, + "step": 110 + }, + { + "epoch": 0.2346878097125867, + "grad_norm": 0.8261978626251221, + "learning_rate": 9.465079977862192e-07, + "logits/chosen": -0.41336673498153687, + "logits/rejected": -0.39544352889060974, + "logps/chosen": -0.7673372030258179, + "logps/rejected": -0.8331737518310547, + "loss": 1.3373, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5346744060516357, + "rewards/margins": 0.1316729635000229, + "rewards/rejected": -1.6663475036621094, + "step": 111 + }, + { + "epoch": 0.236802114304592, + "grad_norm": 0.5922806262969971, + "learning_rate": 9.448285454973737e-07, + "logits/chosen": -0.3224758207798004, + "logits/rejected": -0.3118049204349518, + "logps/chosen": -0.7584627866744995, + "logps/rejected": -0.8859898447990417, + "loss": 1.2731, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.516925573348999, + "rewards/margins": 0.2550540566444397, + "rewards/rejected": -1.7719796895980835, + "step": 112 + }, + { + "epoch": 0.2389164188965973, + "grad_norm": 1.3172541856765747, + "learning_rate": 9.431246724755877e-07, + "logits/chosen": -0.4287208318710327, + "logits/rejected": -0.3984590172767639, + "logps/chosen": -0.7587048411369324, + "logps/rejected": -0.7860502004623413, + "loss": 1.3832, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5174096822738647, + "rewards/margins": 0.05469079315662384, + "rewards/rejected": -1.5721004009246826, + "step": 113 + }, + { + "epoch": 0.24103072348860258, + "grad_norm": 0.7749882340431213, + "learning_rate": 9.413964722621337e-07, + "logits/chosen": -0.39085906744003296, + "logits/rejected": -0.3316206932067871, + "logps/chosen": -0.7035898566246033, + "logps/rejected": -0.7375759482383728, + "loss": 1.387, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4071797132492065, + "rewards/margins": 0.06797221302986145, + "rewards/rejected": -1.4751518964767456, + "step": 114 + }, + { + "epoch": 0.24314502808060787, + "grad_norm": 1.0914056301116943, + "learning_rate": 9.396440397338272e-07, + "logits/chosen": -0.38826486468315125, + "logits/rejected": -0.35520774126052856, + "logps/chosen": -0.7385872602462769, + "logps/rejected": -0.7974889278411865, + "loss": 1.3477, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4771745204925537, + "rewards/margins": 0.11780343949794769, + "rewards/rejected": -1.594977855682373, + "step": 115 + }, + { + "epoch": 0.24525933267261316, + "grad_norm": 1.2966018915176392, + "learning_rate": 9.378674710978183e-07, + "logits/chosen": -0.36493802070617676, + "logits/rejected": -0.34763696789741516, + "logps/chosen": -0.6731826663017273, + "logps/rejected": -0.7645149827003479, + "loss": 1.3, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3463653326034546, + "rewards/margins": 0.18266455829143524, + "rewards/rejected": -1.5290299654006958, + "step": 116 + }, + { + "epoch": 0.24737363726461845, + "grad_norm": 0.49401605129241943, + "learning_rate": 9.360668638863109e-07, + "logits/chosen": -0.40416795015335083, + "logits/rejected": -0.3815993070602417, + "logps/chosen": -0.719497799873352, + "logps/rejected": -0.7588324546813965, + "loss": 1.3621, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.438995599746704, + "rewards/margins": 0.07866920530796051, + "rewards/rejected": -1.517664909362793, + "step": 117 + }, + { + "epoch": 0.2494879418566237, + "grad_norm": 1.0603238344192505, + "learning_rate": 9.342423169512071e-07, + "logits/chosen": -0.3857055604457855, + "logits/rejected": -0.3524513244628906, + "logps/chosen": -0.7373769283294678, + "logps/rejected": -0.7971038818359375, + "loss": 1.3358, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4747538566589355, + "rewards/margins": 0.11945393681526184, + "rewards/rejected": -1.594207763671875, + "step": 118 + }, + { + "epoch": 0.251602246448629, + "grad_norm": 0.9880490303039551, + "learning_rate": 9.323939304586804e-07, + "logits/chosen": -0.31455785036087036, + "logits/rejected": -0.3102484941482544, + "logps/chosen": -0.7276102900505066, + "logps/rejected": -0.7446941137313843, + "loss": 1.3928, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.4552205801010132, + "rewards/margins": 0.034167706966400146, + "rewards/rejected": -1.4893882274627686, + "step": 119 + }, + { + "epoch": 0.2537165510406343, + "grad_norm": 1.0419566631317139, + "learning_rate": 9.305218058836776e-07, + "logits/chosen": -0.38093918561935425, + "logits/rejected": -0.3588898181915283, + "logps/chosen": -0.715582013130188, + "logps/rejected": -0.8271002769470215, + "loss": 1.2934, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.431164026260376, + "rewards/margins": 0.22303667664527893, + "rewards/rejected": -1.654200553894043, + "step": 120 + }, + { + "epoch": 0.2558308556326396, + "grad_norm": 0.657620370388031, + "learning_rate": 9.286260460043473e-07, + "logits/chosen": -0.45690783858299255, + "logits/rejected": -0.4082674980163574, + "logps/chosen": -0.6932571530342102, + "logps/rejected": -0.7631082534790039, + "loss": 1.3398, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3865143060684204, + "rewards/margins": 0.13970226049423218, + "rewards/rejected": -1.5262165069580078, + "step": 121 + }, + { + "epoch": 0.25794516022464486, + "grad_norm": 0.983686089515686, + "learning_rate": 9.267067548963974e-07, + "logits/chosen": -0.40266987681388855, + "logits/rejected": -0.37586671113967896, + "logps/chosen": -0.7362720966339111, + "logps/rejected": -0.7538987398147583, + "loss": 1.4066, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4725441932678223, + "rewards/margins": 0.03525342047214508, + "rewards/rejected": -1.5077974796295166, + "step": 122 + }, + { + "epoch": 0.26005946481665015, + "grad_norm": 1.0076361894607544, + "learning_rate": 9.24764037927381e-07, + "logits/chosen": -0.4461461007595062, + "logits/rejected": -0.40700826048851013, + "logps/chosen": -0.7206646800041199, + "logps/rejected": -0.7489192485809326, + "loss": 1.3759, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4413293600082397, + "rewards/margins": 0.05650928616523743, + "rewards/rejected": -1.4978384971618652, + "step": 123 + }, + { + "epoch": 0.26217376940865544, + "grad_norm": 0.933315098285675, + "learning_rate": 9.22798001750913e-07, + "logits/chosen": -0.3966676890850067, + "logits/rejected": -0.3572196960449219, + "logps/chosen": -0.7075096368789673, + "logps/rejected": -0.7406759262084961, + "loss": 1.3667, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4150192737579346, + "rewards/margins": 0.06633266806602478, + "rewards/rejected": -1.4813518524169922, + "step": 124 + }, + { + "epoch": 0.26428807400066073, + "grad_norm": 0.6277392506599426, + "learning_rate": 9.20808754300814e-07, + "logits/chosen": -0.3555490970611572, + "logits/rejected": -0.35786163806915283, + "logps/chosen": -0.7549921274185181, + "logps/rejected": -0.832869291305542, + "loss": 1.3175, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.5099842548370361, + "rewards/margins": 0.15575438737869263, + "rewards/rejected": -1.665738582611084, + "step": 125 + }, + { + "epoch": 0.266402378592666, + "grad_norm": 0.7172744274139404, + "learning_rate": 9.18796404785185e-07, + "logits/chosen": -0.41230690479278564, + "logits/rejected": -0.39935630559921265, + "logps/chosen": -0.7129833698272705, + "logps/rejected": -0.7888559103012085, + "loss": 1.3167, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.425966739654541, + "rewards/margins": 0.15174514055252075, + "rewards/rejected": -1.577711820602417, + "step": 126 + }, + { + "epoch": 0.2685166831846713, + "grad_norm": 1.2629508972167969, + "learning_rate": 9.16761063680412e-07, + "logits/chosen": -0.36754000186920166, + "logits/rejected": -0.3541562259197235, + "logps/chosen": -0.6992133855819702, + "logps/rejected": -0.7668892741203308, + "loss": 1.3735, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3984267711639404, + "rewards/margins": 0.13535188138484955, + "rewards/rejected": -1.5337785482406616, + "step": 127 + }, + { + "epoch": 0.2706309877766766, + "grad_norm": 0.7024405598640442, + "learning_rate": 9.147028427251009e-07, + "logits/chosen": -0.4014585018157959, + "logits/rejected": -0.40560898184776306, + "logps/chosen": -0.727234959602356, + "logps/rejected": -0.8070081472396851, + "loss": 1.3138, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.454469919204712, + "rewards/margins": 0.15954652428627014, + "rewards/rejected": -1.6140162944793701, + "step": 128 + }, + { + "epoch": 0.2727452923686819, + "grad_norm": 1.6173532009124756, + "learning_rate": 9.126218549139433e-07, + "logits/chosen": -0.32572367787361145, + "logits/rejected": -0.3470613956451416, + "logps/chosen": -0.7555541396141052, + "logps/rejected": -0.8856738209724426, + "loss": 1.2461, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.5111082792282104, + "rewards/margins": 0.26023951172828674, + "rewards/rejected": -1.7713476419448853, + "step": 129 + }, + { + "epoch": 0.2748595969606871, + "grad_norm": 0.5878487229347229, + "learning_rate": 9.105182144915129e-07, + "logits/chosen": -0.39267170429229736, + "logits/rejected": -0.3448992967605591, + "logps/chosen": -0.6776289343833923, + "logps/rejected": -0.7530183792114258, + "loss": 1.3242, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3552578687667847, + "rewards/margins": 0.15077897906303406, + "rewards/rejected": -1.5060367584228516, + "step": 130 + }, + { + "epoch": 0.2769739015526924, + "grad_norm": 0.43264809250831604, + "learning_rate": 9.08392036945994e-07, + "logits/chosen": -0.39980950951576233, + "logits/rejected": -0.4247930645942688, + "logps/chosen": -0.7898982167243958, + "logps/rejected": -0.8856299519538879, + "loss": 1.3004, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5797964334487915, + "rewards/margins": 0.19146347045898438, + "rewards/rejected": -1.7712599039077759, + "step": 131 + }, + { + "epoch": 0.2790882061446977, + "grad_norm": 1.0348538160324097, + "learning_rate": 9.062434390028407e-07, + "logits/chosen": -0.35729700326919556, + "logits/rejected": -0.3265542984008789, + "logps/chosen": -0.7120587229728699, + "logps/rejected": -0.771691083908081, + "loss": 1.3374, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4241174459457397, + "rewards/margins": 0.11926469206809998, + "rewards/rejected": -1.543382167816162, + "step": 132 + }, + { + "epoch": 0.281202510736703, + "grad_norm": 2.0902225971221924, + "learning_rate": 9.04072538618369e-07, + "logits/chosen": -0.4942469298839569, + "logits/rejected": -0.48699846863746643, + "logps/chosen": -0.7882512211799622, + "logps/rejected": -0.8270165920257568, + "loss": 1.3715, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5765024423599243, + "rewards/margins": 0.07753071188926697, + "rewards/rejected": -1.6540331840515137, + "step": 133 + }, + { + "epoch": 0.2833168153287083, + "grad_norm": 1.6436113119125366, + "learning_rate": 9.018794549732817e-07, + "logits/chosen": -0.41133156418800354, + "logits/rejected": -0.4146718382835388, + "logps/chosen": -0.779824435710907, + "logps/rejected": -0.9421006441116333, + "loss": 1.2521, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.559648871421814, + "rewards/margins": 0.324552446603775, + "rewards/rejected": -1.8842012882232666, + "step": 134 + }, + { + "epoch": 0.28543111992071357, + "grad_norm": 0.8831859827041626, + "learning_rate": 8.996643084661244e-07, + "logits/chosen": -0.42452165484428406, + "logits/rejected": -0.3798604905605316, + "logps/chosen": -0.6499216556549072, + "logps/rejected": -0.7796702980995178, + "loss": 1.2581, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.2998433113098145, + "rewards/margins": 0.25949734449386597, + "rewards/rejected": -1.5593405961990356, + "step": 135 + }, + { + "epoch": 0.28754542451271886, + "grad_norm": 0.8031218647956848, + "learning_rate": 8.974272207066767e-07, + "logits/chosen": -0.38131940364837646, + "logits/rejected": -0.3854255676269531, + "logps/chosen": -0.7026851773262024, + "logps/rejected": -0.762391209602356, + "loss": 1.3333, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4053703546524048, + "rewards/margins": 0.11941206455230713, + "rewards/rejected": -1.524782419204712, + "step": 136 + }, + { + "epoch": 0.28965972910472415, + "grad_norm": 1.4455821514129639, + "learning_rate": 8.951683145092748e-07, + "logits/chosen": -0.42824965715408325, + "logits/rejected": -0.4320424795150757, + "logps/chosen": -0.7893270254135132, + "logps/rejected": -0.8517144322395325, + "loss": 1.3652, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5786540508270264, + "rewards/margins": 0.12477481365203857, + "rewards/rejected": -1.703428864479065, + "step": 137 + }, + { + "epoch": 0.29177403369672944, + "grad_norm": 0.6299450397491455, + "learning_rate": 8.928877138860706e-07, + "logits/chosen": -0.4388589560985565, + "logits/rejected": -0.40156903862953186, + "logps/chosen": -0.7346572875976562, + "logps/rejected": -0.8166492581367493, + "loss": 1.3134, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4693145751953125, + "rewards/margins": 0.16398391127586365, + "rewards/rejected": -1.6332985162734985, + "step": 138 + }, + { + "epoch": 0.29388833828873473, + "grad_norm": 2.784437417984009, + "learning_rate": 8.905855440402224e-07, + "logits/chosen": -0.405662477016449, + "logits/rejected": -0.35549795627593994, + "logps/chosen": -0.7482771277427673, + "logps/rejected": -0.795568585395813, + "loss": 1.3656, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.4965542554855347, + "rewards/margins": 0.09458285570144653, + "rewards/rejected": -1.591137170791626, + "step": 139 + }, + { + "epoch": 0.29600264288074, + "grad_norm": 0.4958692193031311, + "learning_rate": 8.882619313590212e-07, + "logits/chosen": -0.3814452886581421, + "logits/rejected": -0.35715553164482117, + "logps/chosen": -0.7731542587280273, + "logps/rejected": -0.8285202980041504, + "loss": 1.3776, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5463085174560547, + "rewards/margins": 0.11073210835456848, + "rewards/rejected": -1.6570405960083008, + "step": 140 + }, + { + "epoch": 0.2981169474727453, + "grad_norm": 0.4597362279891968, + "learning_rate": 8.859170034069532e-07, + "logits/chosen": -0.388383150100708, + "logits/rejected": -0.4071737229824066, + "logps/chosen": -0.7263504266738892, + "logps/rejected": -0.769676148891449, + "loss": 1.3712, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4527008533477783, + "rewards/margins": 0.08665145933628082, + "rewards/rejected": -1.539352297782898, + "step": 141 + }, + { + "epoch": 0.3002312520647506, + "grad_norm": 0.4914930760860443, + "learning_rate": 8.835508889186956e-07, + "logits/chosen": -0.41084378957748413, + "logits/rejected": -0.3823031187057495, + "logps/chosen": -0.7565821409225464, + "logps/rejected": -0.9084322452545166, + "loss": 1.2717, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5131642818450928, + "rewards/margins": 0.3037002384662628, + "rewards/rejected": -1.8168644905090332, + "step": 142 + }, + { + "epoch": 0.3023455566567559, + "grad_norm": 2.0075581073760986, + "learning_rate": 8.811637177920499e-07, + "logits/chosen": -0.4438302516937256, + "logits/rejected": -0.4916025698184967, + "logps/chosen": -0.800719141960144, + "logps/rejected": -0.8658267855644226, + "loss": 1.358, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.601438283920288, + "rewards/margins": 0.1302153617143631, + "rewards/rejected": -1.7316535711288452, + "step": 143 + }, + { + "epoch": 0.3044598612487612, + "grad_norm": 1.1243022680282593, + "learning_rate": 8.7875562108081e-07, + "logits/chosen": -0.40519949793815613, + "logits/rejected": -0.3905750811100006, + "logps/chosen": -0.689585268497467, + "logps/rejected": -0.7312421798706055, + "loss": 1.3503, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.379170536994934, + "rewards/margins": 0.08331384509801865, + "rewards/rejected": -1.462484359741211, + "step": 144 + }, + { + "epoch": 0.3065741658407664, + "grad_norm": 0.7543137669563293, + "learning_rate": 8.76326730987568e-07, + "logits/chosen": -0.4696752727031708, + "logits/rejected": -0.4357326626777649, + "logps/chosen": -0.7813425660133362, + "logps/rejected": -0.8276973962783813, + "loss": 1.3794, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5626851320266724, + "rewards/margins": 0.09270970523357391, + "rewards/rejected": -1.6553947925567627, + "step": 145 + }, + { + "epoch": 0.3086884704327717, + "grad_norm": 1.3136053085327148, + "learning_rate": 8.738771808564555e-07, + "logits/chosen": -0.4262731075286865, + "logits/rejected": -0.44038820266723633, + "logps/chosen": -0.697494387626648, + "logps/rejected": -0.8369535803794861, + "loss": 1.2699, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.394988775253296, + "rewards/margins": 0.2789183557033539, + "rewards/rejected": -1.6739071607589722, + "step": 146 + }, + { + "epoch": 0.310802775024777, + "grad_norm": 2.221562385559082, + "learning_rate": 8.714071051658245e-07, + "logits/chosen": -0.40089336037635803, + "logits/rejected": -0.37991875410079956, + "logps/chosen": -0.7704445123672485, + "logps/rejected": -0.859091579914093, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.540889024734497, + "rewards/margins": 0.17729414999485016, + "rewards/rejected": -1.718183159828186, + "step": 147 + }, + { + "epoch": 0.3129170796167823, + "grad_norm": 1.5049912929534912, + "learning_rate": 8.689166395208636e-07, + "logits/chosen": -0.38984015583992004, + "logits/rejected": -0.35900723934173584, + "logps/chosen": -0.6424779891967773, + "logps/rejected": -0.7145389318466187, + "loss": 1.3261, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.2849559783935547, + "rewards/margins": 0.14412200450897217, + "rewards/rejected": -1.4290778636932373, + "step": 148 + }, + { + "epoch": 0.31503138420878757, + "grad_norm": 0.36125388741493225, + "learning_rate": 8.664059206461534e-07, + "logits/chosen": -0.3490441143512726, + "logits/rejected": -0.3219914436340332, + "logps/chosen": -0.7200264930725098, + "logps/rejected": -0.7924249768257141, + "loss": 1.3476, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4400529861450195, + "rewards/margins": 0.1447969526052475, + "rewards/rejected": -1.5848499536514282, + "step": 149 + }, + { + "epoch": 0.31714568880079286, + "grad_norm": 1.039840579032898, + "learning_rate": 8.638750863781612e-07, + "logits/chosen": -0.40701645612716675, + "logits/rejected": -0.406186580657959, + "logps/chosen": -0.7083575129508972, + "logps/rejected": -0.7766748070716858, + "loss": 1.3263, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4167150259017944, + "rewards/margins": 0.1366347074508667, + "rewards/rejected": -1.5533496141433716, + "step": 150 + }, + { + "epoch": 0.31925999339279815, + "grad_norm": 0.7128564119338989, + "learning_rate": 8.613242756576728e-07, + "logits/chosen": -0.40932926535606384, + "logits/rejected": -0.4234562814235687, + "logps/chosen": -0.6775843501091003, + "logps/rejected": -0.7866222858428955, + "loss": 1.2834, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.3551687002182007, + "rewards/margins": 0.2180757373571396, + "rewards/rejected": -1.573244571685791, + "step": 151 + }, + { + "epoch": 0.32137429798480344, + "grad_norm": 1.1701059341430664, + "learning_rate": 8.587536285221655e-07, + "logits/chosen": -0.3654797077178955, + "logits/rejected": -0.3181680738925934, + "logps/chosen": -0.6686022877693176, + "logps/rejected": -0.7058504223823547, + "loss": 1.3612, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.3372045755386353, + "rewards/margins": 0.07449636608362198, + "rewards/rejected": -1.4117008447647095, + "step": 152 + }, + { + "epoch": 0.3234886025768087, + "grad_norm": 0.8239700794219971, + "learning_rate": 8.561632860981204e-07, + "logits/chosen": -0.42527130246162415, + "logits/rejected": -0.4091627299785614, + "logps/chosen": -0.6969794631004333, + "logps/rejected": -0.8019355535507202, + "loss": 1.2974, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3939589262008667, + "rewards/margins": 0.20991206169128418, + "rewards/rejected": -1.6038711071014404, + "step": 153 + }, + { + "epoch": 0.325602907168814, + "grad_norm": 1.4885636568069458, + "learning_rate": 8.535533905932737e-07, + "logits/chosen": -0.4126192331314087, + "logits/rejected": -0.41548141837120056, + "logps/chosen": -0.7076549530029297, + "logps/rejected": -0.7940821051597595, + "loss": 1.3198, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4153099060058594, + "rewards/margins": 0.17285437881946564, + "rewards/rejected": -1.588164210319519, + "step": 154 + }, + { + "epoch": 0.3277172117608193, + "grad_norm": 1.439434289932251, + "learning_rate": 8.509240852888106e-07, + "logits/chosen": -0.3763914704322815, + "logits/rejected": -0.3617165684700012, + "logps/chosen": -0.7189474105834961, + "logps/rejected": -0.827629804611206, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4378948211669922, + "rewards/margins": 0.2173648476600647, + "rewards/rejected": -1.655259609222412, + "step": 155 + }, + { + "epoch": 0.3298315163528246, + "grad_norm": 1.4505418539047241, + "learning_rate": 8.482755145314985e-07, + "logits/chosen": -0.37879478931427, + "logits/rejected": -0.38689684867858887, + "logps/chosen": -0.7011865973472595, + "logps/rejected": -0.8019431829452515, + "loss": 1.3158, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.402373194694519, + "rewards/margins": 0.2015131413936615, + "rewards/rejected": -1.603886365890503, + "step": 156 + }, + { + "epoch": 0.3319458209448299, + "grad_norm": 2.0968713760375977, + "learning_rate": 8.45607823725763e-07, + "logits/chosen": -0.4366365075111389, + "logits/rejected": -0.41210681200027466, + "logps/chosen": -0.6455651521682739, + "logps/rejected": -0.7228428721427917, + "loss": 1.3247, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.2911303043365479, + "rewards/margins": 0.1545555144548416, + "rewards/rejected": -1.4456857442855835, + "step": 157 + }, + { + "epoch": 0.3340601255368352, + "grad_norm": 0.6716106534004211, + "learning_rate": 8.429211593257052e-07, + "logits/chosen": -0.42992207407951355, + "logits/rejected": -0.4105672836303711, + "logps/chosen": -0.6981461048126221, + "logps/rejected": -0.7909567952156067, + "loss": 1.3128, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3962922096252441, + "rewards/margins": 0.1856214702129364, + "rewards/rejected": -1.5819135904312134, + "step": 158 + }, + { + "epoch": 0.33617443012884046, + "grad_norm": 2.4430501461029053, + "learning_rate": 8.402156688270612e-07, + "logits/chosen": -0.4184916317462921, + "logits/rejected": -0.3943992257118225, + "logps/chosen": -0.6568948030471802, + "logps/rejected": -0.7506390810012817, + "loss": 1.2992, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3137896060943604, + "rewards/margins": 0.18748846650123596, + "rewards/rejected": -1.5012781620025635, + "step": 159 + }, + { + "epoch": 0.3382887347208457, + "grad_norm": 2.0322091579437256, + "learning_rate": 8.374915007591052e-07, + "logits/chosen": -0.4713057577610016, + "logits/rejected": -0.42163771390914917, + "logps/chosen": -0.7347853779792786, + "logps/rejected": -0.7770044207572937, + "loss": 1.3801, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4695707559585571, + "rewards/margins": 0.0844380110502243, + "rewards/rejected": -1.5540088415145874, + "step": 160 + }, + { + "epoch": 0.340403039312851, + "grad_norm": 0.4045500159263611, + "learning_rate": 8.347488046764948e-07, + "logits/chosen": -0.39465126395225525, + "logits/rejected": -0.3961923122406006, + "logps/chosen": -0.601732075214386, + "logps/rejected": -0.694148600101471, + "loss": 1.2859, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.203464150428772, + "rewards/margins": 0.18483319878578186, + "rewards/rejected": -1.388297200202942, + "step": 161 + }, + { + "epoch": 0.3425173439048563, + "grad_norm": 2.79396915435791, + "learning_rate": 8.319877311510612e-07, + "logits/chosen": -0.4311378002166748, + "logits/rejected": -0.4248836636543274, + "logps/chosen": -0.6813413500785828, + "logps/rejected": -0.775830864906311, + "loss": 1.3001, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3626827001571655, + "rewards/margins": 0.18897925317287445, + "rewards/rejected": -1.551661729812622, + "step": 162 + }, + { + "epoch": 0.34463164849686156, + "grad_norm": 0.714146077632904, + "learning_rate": 8.292084317635419e-07, + "logits/chosen": -0.4060715436935425, + "logits/rejected": -0.3770482540130615, + "logps/chosen": -0.7176523208618164, + "logps/rejected": -0.7973593473434448, + "loss": 1.324, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4353046417236328, + "rewards/margins": 0.15941408276557922, + "rewards/rejected": -1.5947186946868896, + "step": 163 + }, + { + "epoch": 0.34674595308886685, + "grad_norm": 1.6007037162780762, + "learning_rate": 8.264110590952607e-07, + "logits/chosen": -0.49063974618911743, + "logits/rejected": -0.5119628310203552, + "logps/chosen": -0.7263911366462708, + "logps/rejected": -0.9138184785842896, + "loss": 1.2439, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.4527822732925415, + "rewards/margins": 0.3748546540737152, + "rewards/rejected": -1.827636957168579, + "step": 164 + }, + { + "epoch": 0.34886025768087214, + "grad_norm": 1.4566830396652222, + "learning_rate": 8.235957667197494e-07, + "logits/chosen": -0.4681779146194458, + "logits/rejected": -0.46475380659103394, + "logps/chosen": -0.6923782229423523, + "logps/rejected": -0.7901281118392944, + "loss": 1.295, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.3847564458847046, + "rewards/margins": 0.19549959897994995, + "rewards/rejected": -1.5802562236785889, + "step": 165 + }, + { + "epoch": 0.35097456227287743, + "grad_norm": 3.0825328826904297, + "learning_rate": 8.207627091943177e-07, + "logits/chosen": -0.4294862151145935, + "logits/rejected": -0.42411237955093384, + "logps/chosen": -0.6851246356964111, + "logps/rejected": -0.7844961881637573, + "loss": 1.2871, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3702492713928223, + "rewards/margins": 0.19874317944049835, + "rewards/rejected": -1.5689923763275146, + "step": 166 + }, + { + "epoch": 0.3530888668648827, + "grad_norm": 1.0783339738845825, + "learning_rate": 8.179120420515675e-07, + "logits/chosen": -0.4528030455112457, + "logits/rejected": -0.4626815617084503, + "logps/chosen": -0.703376293182373, + "logps/rejected": -0.8752757906913757, + "loss": 1.2193, + "rewards/accuracies": 0.703125, + "rewards/chosen": -1.406752586364746, + "rewards/margins": 0.34379899501800537, + "rewards/rejected": -1.7505515813827515, + "step": 167 + }, + { + "epoch": 0.355203171456888, + "grad_norm": 2.6788036823272705, + "learning_rate": 8.150439217908556e-07, + "logits/chosen": -0.44946759939193726, + "logits/rejected": -0.47430264949798584, + "logps/chosen": -0.751136839389801, + "logps/rejected": -0.874577522277832, + "loss": 1.29, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.502273678779602, + "rewards/margins": 0.24688144028186798, + "rewards/rejected": -1.749155044555664, + "step": 168 + }, + { + "epoch": 0.3573174760488933, + "grad_norm": 0.9087730050086975, + "learning_rate": 8.121585058696999e-07, + "logits/chosen": -0.47294262051582336, + "logits/rejected": -0.46765226125717163, + "logps/chosen": -0.7291173934936523, + "logps/rejected": -0.7999277114868164, + "loss": 1.3482, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4582347869873047, + "rewards/margins": 0.1416206806898117, + "rewards/rejected": -1.5998554229736328, + "step": 169 + }, + { + "epoch": 0.3594317806408986, + "grad_norm": 3.392674207687378, + "learning_rate": 8.092559526951374e-07, + "logits/chosen": -0.5026620626449585, + "logits/rejected": -0.46620574593544006, + "logps/chosen": -0.746992290019989, + "logps/rejected": -0.8266301155090332, + "loss": 1.3202, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.493984580039978, + "rewards/margins": 0.15927578508853912, + "rewards/rejected": -1.6532602310180664, + "step": 170 + }, + { + "epoch": 0.3615460852329039, + "grad_norm": 1.27628755569458, + "learning_rate": 8.063364216150256e-07, + "logits/chosen": -0.5211395025253296, + "logits/rejected": -0.5419963598251343, + "logps/chosen": -0.7919114828109741, + "logps/rejected": -0.8731362223625183, + "loss": 1.3228, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5838229656219482, + "rewards/margins": 0.16244953870773315, + "rewards/rejected": -1.7462724447250366, + "step": 171 + }, + { + "epoch": 0.36366038982490917, + "grad_norm": 0.8269656896591187, + "learning_rate": 8.034000729092967e-07, + "logits/chosen": -0.49545183777809143, + "logits/rejected": -0.4716613292694092, + "logps/chosen": -0.719520092010498, + "logps/rejected": -0.7876347303390503, + "loss": 1.3367, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.439040184020996, + "rewards/margins": 0.13622930645942688, + "rewards/rejected": -1.5752694606781006, + "step": 172 + }, + { + "epoch": 0.36577469441691446, + "grad_norm": 0.6049383282661438, + "learning_rate": 8.004470677811559e-07, + "logits/chosen": -0.45276379585266113, + "logits/rejected": -0.42617955803871155, + "logps/chosen": -0.7097947597503662, + "logps/rejected": -0.7606989145278931, + "loss": 1.3909, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4195895195007324, + "rewards/margins": 0.10180822014808655, + "rewards/rejected": -1.5213978290557861, + "step": 173 + }, + { + "epoch": 0.36788899900891975, + "grad_norm": 3.980013847351074, + "learning_rate": 7.974775683482337e-07, + "logits/chosen": -0.4783569574356079, + "logits/rejected": -0.43521156907081604, + "logps/chosen": -0.7623491287231445, + "logps/rejected": -0.8719285130500793, + "loss": 1.2838, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.524698257446289, + "rewards/margins": 0.2191585898399353, + "rewards/rejected": -1.7438570261001587, + "step": 174 + }, + { + "epoch": 0.370003303600925, + "grad_norm": 1.024530053138733, + "learning_rate": 7.94491737633684e-07, + "logits/chosen": -0.5009916424751282, + "logits/rejected": -0.48874592781066895, + "logps/chosen": -0.7552992701530457, + "logps/rejected": -0.8485872745513916, + "loss": 1.3153, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5105985403060913, + "rewards/margins": 0.18657605350017548, + "rewards/rejected": -1.6971745491027832, + "step": 175 + }, + { + "epoch": 0.37211760819293027, + "grad_norm": 1.5952919721603394, + "learning_rate": 7.91489739557236e-07, + "logits/chosen": -0.4424138069152832, + "logits/rejected": -0.4334307312965393, + "logps/chosen": -0.6956002116203308, + "logps/rejected": -0.8018803000450134, + "loss": 1.3011, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.3912004232406616, + "rewards/margins": 0.21256020665168762, + "rewards/rejected": -1.6037606000900269, + "step": 176 + }, + { + "epoch": 0.37423191278493556, + "grad_norm": 1.8331164121627808, + "learning_rate": 7.884717389261934e-07, + "logits/chosen": -0.4836267828941345, + "logits/rejected": -0.5018677115440369, + "logps/chosen": -0.7895969152450562, + "logps/rejected": -0.927432656288147, + "loss": 1.2467, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5791938304901123, + "rewards/margins": 0.27567166090011597, + "rewards/rejected": -1.854865312576294, + "step": 177 + }, + { + "epoch": 0.37634621737694085, + "grad_norm": 2.165984869003296, + "learning_rate": 7.854379014263876e-07, + "logits/chosen": -0.46125832200050354, + "logits/rejected": -0.39802712202072144, + "logps/chosen": -0.8382925391197205, + "logps/rejected": -0.9422982931137085, + "loss": 1.339, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.676585078239441, + "rewards/margins": 0.20801125466823578, + "rewards/rejected": -1.884596586227417, + "step": 178 + }, + { + "epoch": 0.37846052196894614, + "grad_norm": 0.522197425365448, + "learning_rate": 7.823883936130817e-07, + "logits/chosen": -0.4747823476791382, + "logits/rejected": -0.4888593554496765, + "logps/chosen": -0.723059892654419, + "logps/rejected": -0.84626305103302, + "loss": 1.2708, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.446119785308838, + "rewards/margins": 0.24640652537345886, + "rewards/rejected": -1.69252610206604, + "step": 179 + }, + { + "epoch": 0.38057482656095143, + "grad_norm": 1.9690748453140259, + "learning_rate": 7.793233829018262e-07, + "logits/chosen": -0.5430271625518799, + "logits/rejected": -0.5403288006782532, + "logps/chosen": -0.8244275450706482, + "logps/rejected": -0.9133931994438171, + "loss": 1.3306, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6488550901412964, + "rewards/margins": 0.17793115973472595, + "rewards/rejected": -1.8267863988876343, + "step": 180 + }, + { + "epoch": 0.3826891311529567, + "grad_norm": 2.9181363582611084, + "learning_rate": 7.762430375592688e-07, + "logits/chosen": -0.4843495786190033, + "logits/rejected": -0.47929176688194275, + "logps/chosen": -0.8097372055053711, + "logps/rejected": -0.8973760008811951, + "loss": 1.3283, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6194744110107422, + "rewards/margins": 0.17527759075164795, + "rewards/rejected": -1.7947520017623901, + "step": 181 + }, + { + "epoch": 0.384803435744962, + "grad_norm": 4.227083683013916, + "learning_rate": 7.731475266939158e-07, + "logits/chosen": -0.5047686696052551, + "logits/rejected": -0.4921850264072418, + "logps/chosen": -0.875984787940979, + "logps/rejected": -1.0406755208969116, + "loss": 1.3169, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.751969575881958, + "rewards/margins": 0.32938146591186523, + "rewards/rejected": -2.0813510417938232, + "step": 182 + }, + { + "epoch": 0.3869177403369673, + "grad_norm": 1.2871490716934204, + "learning_rate": 7.700370202468489e-07, + "logits/chosen": -0.5123783349990845, + "logits/rejected": -0.55179762840271, + "logps/chosen": -0.8869211077690125, + "logps/rejected": -1.1082773208618164, + "loss": 1.216, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.773842215538025, + "rewards/margins": 0.4427123963832855, + "rewards/rejected": -2.216554641723633, + "step": 183 + }, + { + "epoch": 0.3890320449289726, + "grad_norm": 1.3015679121017456, + "learning_rate": 7.669116889823954e-07, + "logits/chosen": -0.49182361364364624, + "logits/rejected": -0.5180585384368896, + "logps/chosen": -0.8816227912902832, + "logps/rejected": -0.9516821503639221, + "loss": 1.3449, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7632455825805664, + "rewards/margins": 0.14011862874031067, + "rewards/rejected": -1.9033643007278442, + "step": 184 + }, + { + "epoch": 0.3911463495209779, + "grad_norm": 4.280956268310547, + "learning_rate": 7.637717044787526e-07, + "logits/chosen": -0.5702117681503296, + "logits/rejected": -0.5475804209709167, + "logps/chosen": -0.9307697415351868, + "logps/rejected": -1.0322346687316895, + "loss": 1.3434, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.8615394830703735, + "rewards/margins": 0.20292985439300537, + "rewards/rejected": -2.064469337463379, + "step": 185 + }, + { + "epoch": 0.39326065411298317, + "grad_norm": 1.3511455059051514, + "learning_rate": 7.606172391185699e-07, + "logits/chosen": -0.5466108322143555, + "logits/rejected": -0.551085352897644, + "logps/chosen": -1.0657893419265747, + "logps/rejected": -1.15786612033844, + "loss": 1.3549, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -2.1315786838531494, + "rewards/margins": 0.18415334820747375, + "rewards/rejected": -2.31573224067688, + "step": 186 + }, + { + "epoch": 0.39537495870498846, + "grad_norm": 0.7001176476478577, + "learning_rate": 7.574484660794836e-07, + "logits/chosen": -0.4849010407924652, + "logits/rejected": -0.5057946443557739, + "logps/chosen": -1.0784757137298584, + "logps/rejected": -1.2035218477249146, + "loss": 1.3556, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.156951427459717, + "rewards/margins": 0.25009211897850037, + "rewards/rejected": -2.407043695449829, + "step": 187 + }, + { + "epoch": 0.39748926329699374, + "grad_norm": 3.1405649185180664, + "learning_rate": 7.542655593246103e-07, + "logits/chosen": -0.5316596031188965, + "logits/rejected": -0.5658366680145264, + "logps/chosen": -1.0630009174346924, + "logps/rejected": -1.2867177724838257, + "loss": 1.2612, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.1260018348693848, + "rewards/margins": 0.447433739900589, + "rewards/rejected": -2.5734355449676514, + "step": 188 + }, + { + "epoch": 0.39960356788899903, + "grad_norm": 2.142986297607422, + "learning_rate": 7.510686935929962e-07, + "logits/chosen": -0.5959028005599976, + "logits/rejected": -0.5836039781570435, + "logps/chosen": -1.111003041267395, + "logps/rejected": -1.1858208179473877, + "loss": 1.3958, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.22200608253479, + "rewards/margins": 0.149635449051857, + "rewards/rejected": -2.3716416358947754, + "step": 189 + }, + { + "epoch": 0.40171787248100427, + "grad_norm": 1.9227335453033447, + "learning_rate": 7.478580443900246e-07, + "logits/chosen": -0.607532799243927, + "logits/rejected": -0.6102017760276794, + "logps/chosen": -1.3353261947631836, + "logps/rejected": -1.3975369930267334, + "loss": 1.457, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -2.670652389526367, + "rewards/margins": 0.12442154437303543, + "rewards/rejected": -2.795073986053467, + "step": 190 + }, + { + "epoch": 0.40383217707300956, + "grad_norm": 0.8509105443954468, + "learning_rate": 7.446337879777802e-07, + "logits/chosen": -0.5903070569038391, + "logits/rejected": -0.5728173851966858, + "logps/chosen": -1.27094566822052, + "logps/rejected": -1.3024815320968628, + "loss": 1.4953, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.54189133644104, + "rewards/margins": 0.06307169049978256, + "rewards/rejected": -2.6049630641937256, + "step": 191 + }, + { + "epoch": 0.40594648166501485, + "grad_norm": 1.1561088562011719, + "learning_rate": 7.413961013653725e-07, + "logits/chosen": -0.5578102469444275, + "logits/rejected": -0.5907329320907593, + "logps/chosen": -1.3817013502120972, + "logps/rejected": -1.419295072555542, + "loss": 1.4865, + "rewards/accuracies": 0.515625, + "rewards/chosen": -2.7634027004241943, + "rewards/margins": 0.07518734782934189, + "rewards/rejected": -2.838590145111084, + "step": 192 + }, + { + "epoch": 0.40806078625702014, + "grad_norm": 8.165387153625488, + "learning_rate": 7.381451622992183e-07, + "logits/chosen": -0.5213198661804199, + "logits/rejected": -0.5392848253250122, + "logps/chosen": -1.1798306703567505, + "logps/rejected": -1.2692899703979492, + "loss": 1.3971, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.359661340713501, + "rewards/margins": 0.17891867458820343, + "rewards/rejected": -2.5385799407958984, + "step": 193 + }, + { + "epoch": 0.4101750908490254, + "grad_norm": 1.2850884199142456, + "learning_rate": 7.348811492532839e-07, + "logits/chosen": -0.5382787585258484, + "logits/rejected": -0.5274642705917358, + "logps/chosen": -1.242587685585022, + "logps/rejected": -1.272438645362854, + "loss": 1.4795, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.485175371170044, + "rewards/margins": 0.05970197170972824, + "rewards/rejected": -2.544877290725708, + "step": 194 + }, + { + "epoch": 0.4122893954410307, + "grad_norm": 4.910929203033447, + "learning_rate": 7.316042414192864e-07, + "logits/chosen": -0.6186666488647461, + "logits/rejected": -0.6255884170532227, + "logps/chosen": -1.1743704080581665, + "logps/rejected": -1.2720146179199219, + "loss": 1.4127, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.348740816116333, + "rewards/margins": 0.19528816640377045, + "rewards/rejected": -2.5440292358398438, + "step": 195 + }, + { + "epoch": 0.414403700033036, + "grad_norm": 4.270901203155518, + "learning_rate": 7.283146186968565e-07, + "logits/chosen": -0.5861366987228394, + "logits/rejected": -0.6005197763442993, + "logps/chosen": -1.2127022743225098, + "logps/rejected": -1.3036490678787231, + "loss": 1.4067, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.4254045486450195, + "rewards/margins": 0.18189355731010437, + "rewards/rejected": -2.6072981357574463, + "step": 196 + }, + { + "epoch": 0.4165180046250413, + "grad_norm": 0.3070116639137268, + "learning_rate": 7.250124616836622e-07, + "logits/chosen": -0.6026022434234619, + "logits/rejected": -0.5920048952102661, + "logps/chosen": -1.0706496238708496, + "logps/rejected": -1.2879594564437866, + "loss": 1.2465, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.141299247741699, + "rewards/margins": 0.4346192479133606, + "rewards/rejected": -2.5759189128875732, + "step": 197 + }, + { + "epoch": 0.4186323092170466, + "grad_norm": 1.160252571105957, + "learning_rate": 7.216979516654943e-07, + "logits/chosen": -0.5808722376823425, + "logits/rejected": -0.5770124197006226, + "logps/chosen": -1.0426011085510254, + "logps/rejected": -1.1295092105865479, + "loss": 1.4244, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.085202217102051, + "rewards/margins": 0.1738162338733673, + "rewards/rejected": -2.2590184211730957, + "step": 198 + }, + { + "epoch": 0.4207466138090519, + "grad_norm": 4.6966471672058105, + "learning_rate": 7.183712706063132e-07, + "logits/chosen": -0.5958350896835327, + "logits/rejected": -0.6440161466598511, + "logps/chosen": -0.981076717376709, + "logps/rejected": -1.1257147789001465, + "loss": 1.3175, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.962153434753418, + "rewards/margins": 0.28927627205848694, + "rewards/rejected": -2.251429557800293, + "step": 199 + }, + { + "epoch": 0.42286091840105716, + "grad_norm": 2.9395248889923096, + "learning_rate": 7.150326011382603e-07, + "logits/chosen": -0.5647889375686646, + "logits/rejected": -0.5762943625450134, + "logps/chosen": -0.8101261854171753, + "logps/rejected": -1.0001438856124878, + "loss": 1.2135, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6202523708343506, + "rewards/margins": 0.38003528118133545, + "rewards/rejected": -2.0002877712249756, + "step": 200 + }, + { + "epoch": 0.42497522299306245, + "grad_norm": 1.2575147151947021, + "learning_rate": 7.116821265516306e-07, + "logits/chosen": -0.5834293961524963, + "logits/rejected": -0.5929508805274963, + "logps/chosen": -0.8768399953842163, + "logps/rejected": -1.0942046642303467, + "loss": 1.219, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7536799907684326, + "rewards/margins": 0.43472927808761597, + "rewards/rejected": -2.1884093284606934, + "step": 201 + }, + { + "epoch": 0.42708952758506774, + "grad_norm": 1.4035751819610596, + "learning_rate": 7.083200307848115e-07, + "logits/chosen": -0.5424078106880188, + "logits/rejected": -0.5316082239151001, + "logps/chosen": -0.8791903257369995, + "logps/rejected": -0.9323580265045166, + "loss": 1.3675, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.758380651473999, + "rewards/margins": 0.10633517056703568, + "rewards/rejected": -1.8647160530090332, + "step": 202 + }, + { + "epoch": 0.42920383217707303, + "grad_norm": 1.8622503280639648, + "learning_rate": 7.049464984141829e-07, + "logits/chosen": -0.5329294204711914, + "logits/rejected": -0.5523126721382141, + "logps/chosen": -0.695776104927063, + "logps/rejected": -0.8400713801383972, + "loss": 1.2285, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.391552209854126, + "rewards/margins": 0.28859058022499084, + "rewards/rejected": -1.6801427602767944, + "step": 203 + }, + { + "epoch": 0.4313181367690783, + "grad_norm": 0.8603182435035706, + "learning_rate": 7.015617146439861e-07, + "logits/chosen": -0.4516752064228058, + "logits/rejected": -0.46907976269721985, + "logps/chosen": -0.6868133544921875, + "logps/rejected": -0.8646677732467651, + "loss": 1.2417, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.373626708984375, + "rewards/margins": 0.355709046125412, + "rewards/rejected": -1.7293355464935303, + "step": 204 + }, + { + "epoch": 0.43343244136108355, + "grad_norm": 0.6437748670578003, + "learning_rate": 6.981658652961546e-07, + "logits/chosen": -0.6159051656723022, + "logits/rejected": -0.6000130772590637, + "logps/chosen": -0.7715178728103638, + "logps/rejected": -0.8714219331741333, + "loss": 1.3469, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5430357456207275, + "rewards/margins": 0.19980813562870026, + "rewards/rejected": -1.7428438663482666, + "step": 205 + }, + { + "epoch": 0.43554674595308884, + "grad_norm": 1.2309322357177734, + "learning_rate": 6.947591368001137e-07, + "logits/chosen": -0.5913614630699158, + "logits/rejected": -0.6128537654876709, + "logps/chosen": -0.7512561678886414, + "logps/rejected": -0.8872793912887573, + "loss": 1.26, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5025123357772827, + "rewards/margins": 0.2720465660095215, + "rewards/rejected": -1.7745587825775146, + "step": 206 + }, + { + "epoch": 0.43766105054509413, + "grad_norm": 0.6153685450553894, + "learning_rate": 6.913417161825449e-07, + "logits/chosen": -0.5976595878601074, + "logits/rejected": -0.6222202181816101, + "logps/chosen": -0.837669849395752, + "logps/rejected": -0.9835771918296814, + "loss": 1.2986, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.675339698791504, + "rewards/margins": 0.2918146252632141, + "rewards/rejected": -1.9671543836593628, + "step": 207 + }, + { + "epoch": 0.4397753551370994, + "grad_norm": 1.9922760725021362, + "learning_rate": 6.87913791057119e-07, + "logits/chosen": -0.6808818578720093, + "logits/rejected": -0.6692708730697632, + "logps/chosen": -0.7088961601257324, + "logps/rejected": -0.8256410360336304, + "loss": 1.281, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4177923202514648, + "rewards/margins": 0.23348984122276306, + "rewards/rejected": -1.6512820720672607, + "step": 208 + }, + { + "epoch": 0.4418896597291047, + "grad_norm": 1.9562067985534668, + "learning_rate": 6.844755496141961e-07, + "logits/chosen": -0.5282632112503052, + "logits/rejected": -0.5692226886749268, + "logps/chosen": -0.7235382795333862, + "logps/rejected": -0.801092803478241, + "loss": 1.3227, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4470765590667725, + "rewards/margins": 0.1551089584827423, + "rewards/rejected": -1.602185606956482, + "step": 209 + }, + { + "epoch": 0.44400396432111, + "grad_norm": 0.8182584047317505, + "learning_rate": 6.81027180610493e-07, + "logits/chosen": -0.6418904662132263, + "logits/rejected": -0.5941328406333923, + "logps/chosen": -0.820648729801178, + "logps/rejected": -0.8864803910255432, + "loss": 1.3498, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.641297459602356, + "rewards/margins": 0.13166317343711853, + "rewards/rejected": -1.7729607820510864, + "step": 210 + }, + { + "epoch": 0.4461182689131153, + "grad_norm": 3.075260877609253, + "learning_rate": 6.775688733587227e-07, + "logits/chosen": -0.5926809906959534, + "logits/rejected": -0.5844541788101196, + "logps/chosen": -0.7822425365447998, + "logps/rejected": -0.8866626024246216, + "loss": 1.2884, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5644850730895996, + "rewards/margins": 0.20884013175964355, + "rewards/rejected": -1.7733252048492432, + "step": 211 + }, + { + "epoch": 0.4482325735051206, + "grad_norm": 0.8032744526863098, + "learning_rate": 6.741008177171993e-07, + "logits/chosen": -0.579971432685852, + "logits/rejected": -0.5978566408157349, + "logps/chosen": -0.721234917640686, + "logps/rejected": -0.8368514180183411, + "loss": 1.2781, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.442469835281372, + "rewards/margins": 0.23123310506343842, + "rewards/rejected": -1.6737028360366821, + "step": 212 + }, + { + "epoch": 0.45034687809712587, + "grad_norm": 0.6680911779403687, + "learning_rate": 6.706232040794161e-07, + "logits/chosen": -0.6748596429824829, + "logits/rejected": -0.6615546941757202, + "logps/chosen": -0.7931480407714844, + "logps/rejected": -0.8879257440567017, + "loss": 1.337, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5862960815429688, + "rewards/margins": 0.1895553171634674, + "rewards/rejected": -1.7758514881134033, + "step": 213 + }, + { + "epoch": 0.45246118268913116, + "grad_norm": 2.5107688903808594, + "learning_rate": 6.671362233635925e-07, + "logits/chosen": -0.6460363268852234, + "logits/rejected": -0.6273557543754578, + "logps/chosen": -0.823783814907074, + "logps/rejected": -0.87412428855896, + "loss": 1.3756, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.647567629814148, + "rewards/margins": 0.10068092495203018, + "rewards/rejected": -1.74824857711792, + "step": 214 + }, + { + "epoch": 0.45457548728113645, + "grad_norm": 2.2206740379333496, + "learning_rate": 6.636400670021933e-07, + "logits/chosen": -0.6295229196548462, + "logits/rejected": -0.6330893039703369, + "logps/chosen": -0.807812511920929, + "logps/rejected": -0.9784457683563232, + "loss": 1.2259, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.615625023841858, + "rewards/margins": 0.3412665128707886, + "rewards/rejected": -1.9568915367126465, + "step": 215 + }, + { + "epoch": 0.45668979187314174, + "grad_norm": 1.2925803661346436, + "learning_rate": 6.601349269314187e-07, + "logits/chosen": -0.6001027822494507, + "logits/rejected": -0.6305864453315735, + "logps/chosen": -0.7216315865516663, + "logps/rejected": -0.8616191744804382, + "loss": 1.269, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.4432631731033325, + "rewards/margins": 0.2799749970436096, + "rewards/rejected": -1.7232383489608765, + "step": 216 + }, + { + "epoch": 0.458804096465147, + "grad_norm": 4.863992214202881, + "learning_rate": 6.566209955806679e-07, + "logits/chosen": -0.5307935476303101, + "logits/rejected": -0.5385264754295349, + "logps/chosen": -0.8053566813468933, + "logps/rejected": -0.9241464734077454, + "loss": 1.3325, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.6107133626937866, + "rewards/margins": 0.23757943511009216, + "rewards/rejected": -1.8482929468154907, + "step": 217 + }, + { + "epoch": 0.4609184010571523, + "grad_norm": 1.0189604759216309, + "learning_rate": 6.530984658619733e-07, + "logits/chosen": -0.7031885385513306, + "logits/rejected": -0.7072005867958069, + "logps/chosen": -0.8382629752159119, + "logps/rejected": -0.9468755722045898, + "loss": 1.3276, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.6765259504318237, + "rewards/margins": 0.21722503006458282, + "rewards/rejected": -1.8937511444091797, + "step": 218 + }, + { + "epoch": 0.4630327056491576, + "grad_norm": 1.1178699731826782, + "learning_rate": 6.495675311594122e-07, + "logits/chosen": -0.5736142992973328, + "logits/rejected": -0.5926069021224976, + "logps/chosen": -0.7676032781600952, + "logps/rejected": -0.9179919958114624, + "loss": 1.278, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5352065563201904, + "rewards/margins": 0.3007773756980896, + "rewards/rejected": -1.8359839916229248, + "step": 219 + }, + { + "epoch": 0.4651470102411629, + "grad_norm": 2.4985287189483643, + "learning_rate": 6.460283853184879e-07, + "logits/chosen": -0.6372602581977844, + "logits/rejected": -0.6313104033470154, + "logps/chosen": -0.8754556179046631, + "logps/rejected": -0.9803894758224487, + "loss": 1.3166, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7509112358093262, + "rewards/margins": 0.2098677009344101, + "rewards/rejected": -1.9607789516448975, + "step": 220 + }, + { + "epoch": 0.46726131483316813, + "grad_norm": 1.5675435066223145, + "learning_rate": 6.424812226354889e-07, + "logits/chosen": -0.6377983093261719, + "logits/rejected": -0.6666730642318726, + "logps/chosen": -0.7556843757629395, + "logps/rejected": -0.9096466302871704, + "loss": 1.2397, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.511368751525879, + "rewards/margins": 0.30792441964149475, + "rewards/rejected": -1.8192932605743408, + "step": 221 + }, + { + "epoch": 0.4693756194251734, + "grad_norm": 2.853426218032837, + "learning_rate": 6.389262378468219e-07, + "logits/chosen": -0.6055567860603333, + "logits/rejected": -0.612144947052002, + "logps/chosen": -0.8588352203369141, + "logps/rejected": -0.8928595185279846, + "loss": 1.4022, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7176704406738281, + "rewards/margins": 0.06804870069026947, + "rewards/rejected": -1.7857190370559692, + "step": 222 + }, + { + "epoch": 0.4714899240171787, + "grad_norm": 0.528042733669281, + "learning_rate": 6.353636261183213e-07, + "logits/chosen": -0.6543641090393066, + "logits/rejected": -0.6635830402374268, + "logps/chosen": -0.7858147621154785, + "logps/rejected": -0.9400445222854614, + "loss": 1.2446, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.571629524230957, + "rewards/margins": 0.3084595203399658, + "rewards/rejected": -1.8800890445709229, + "step": 223 + }, + { + "epoch": 0.473604228609184, + "grad_norm": 1.1155768632888794, + "learning_rate": 6.317935830345338e-07, + "logits/chosen": -0.5700349807739258, + "logits/rejected": -0.6560614705085754, + "logps/chosen": -0.8426170945167542, + "logps/rejected": -0.9983471035957336, + "loss": 1.3204, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6852341890335083, + "rewards/margins": 0.3114599883556366, + "rewards/rejected": -1.9966942071914673, + "step": 224 + }, + { + "epoch": 0.4757185332011893, + "grad_norm": 0.802669107913971, + "learning_rate": 6.282163045879823e-07, + "logits/chosen": -0.6912901401519775, + "logits/rejected": -0.7201069593429565, + "logps/chosen": -0.8135342597961426, + "logps/rejected": -0.9537283182144165, + "loss": 1.2961, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6270685195922852, + "rewards/margins": 0.2803882658481598, + "rewards/rejected": -1.907456636428833, + "step": 225 + }, + { + "epoch": 0.4778328377931946, + "grad_norm": 1.709757924079895, + "learning_rate": 6.246319871684047e-07, + "logits/chosen": -0.7573816776275635, + "logits/rejected": -0.8028420209884644, + "logps/chosen": -0.891952633857727, + "logps/rejected": -1.0168029069900513, + "loss": 1.333, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.783905267715454, + "rewards/margins": 0.24970072507858276, + "rewards/rejected": -2.0336058139801025, + "step": 226 + }, + { + "epoch": 0.47994714238519987, + "grad_norm": 2.170957326889038, + "learning_rate": 6.210408275519734e-07, + "logits/chosen": -0.6915597915649414, + "logits/rejected": -0.7027997970581055, + "logps/chosen": -0.9063036441802979, + "logps/rejected": -1.0104373693466187, + "loss": 1.3388, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8126072883605957, + "rewards/margins": 0.20826762914657593, + "rewards/rejected": -2.0208747386932373, + "step": 227 + }, + { + "epoch": 0.48206144697720515, + "grad_norm": 1.8802261352539062, + "learning_rate": 6.174430228904919e-07, + "logits/chosen": -0.689726710319519, + "logits/rejected": -0.7143282890319824, + "logps/chosen": -0.7480812072753906, + "logps/rejected": -0.8698041439056396, + "loss": 1.2836, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4961624145507812, + "rewards/margins": 0.24344584345817566, + "rewards/rejected": -1.7396082878112793, + "step": 228 + }, + { + "epoch": 0.48417575156921044, + "grad_norm": 2.5202934741973877, + "learning_rate": 6.13838770700571e-07, + "logits/chosen": -0.6858299374580383, + "logits/rejected": -0.7115206122398376, + "logps/chosen": -0.8575515151023865, + "logps/rejected": -0.9657347202301025, + "loss": 1.3046, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.715103030204773, + "rewards/margins": 0.21636635065078735, + "rewards/rejected": -1.931469440460205, + "step": 229 + }, + { + "epoch": 0.48629005616121573, + "grad_norm": 1.268512487411499, + "learning_rate": 6.102282688527859e-07, + "logits/chosen": -0.7078689932823181, + "logits/rejected": -0.7254161238670349, + "logps/chosen": -0.8850880861282349, + "logps/rejected": -1.031385898590088, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7701761722564697, + "rewards/margins": 0.29259535670280457, + "rewards/rejected": -2.062771797180176, + "step": 230 + }, + { + "epoch": 0.488404360753221, + "grad_norm": 1.7285584211349487, + "learning_rate": 6.066117155608135e-07, + "logits/chosen": -0.7325868606567383, + "logits/rejected": -0.7433226108551025, + "logps/chosen": -0.8014956116676331, + "logps/rejected": -0.9653260111808777, + "loss": 1.2429, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.6029912233352661, + "rewards/margins": 0.32766085863113403, + "rewards/rejected": -1.9306520223617554, + "step": 231 + }, + { + "epoch": 0.4905186653452263, + "grad_norm": 0.6270304322242737, + "learning_rate": 6.029893093705491e-07, + "logits/chosen": -0.692166805267334, + "logits/rejected": -0.6799293756484985, + "logps/chosen": -0.7850213646888733, + "logps/rejected": -0.8839574456214905, + "loss": 1.2967, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.5700427293777466, + "rewards/margins": 0.19787229597568512, + "rewards/rejected": -1.767914891242981, + "step": 232 + }, + { + "epoch": 0.4926329699372316, + "grad_norm": 1.0160484313964844, + "learning_rate": 5.993612491492087e-07, + "logits/chosen": -0.7095844149589539, + "logits/rejected": -0.71524578332901, + "logps/chosen": -0.7063854336738586, + "logps/rejected": -0.8855549097061157, + "loss": 1.2176, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.4127708673477173, + "rewards/margins": 0.3583390712738037, + "rewards/rejected": -1.7711098194122314, + "step": 233 + }, + { + "epoch": 0.4947472745292369, + "grad_norm": 2.225841999053955, + "learning_rate": 5.957277340744094e-07, + "logits/chosen": -0.7488946318626404, + "logits/rejected": -0.7588428854942322, + "logps/chosen": -0.9203822612762451, + "logps/rejected": -1.0089298486709595, + "loss": 1.355, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8407645225524902, + "rewards/margins": 0.17709502577781677, + "rewards/rejected": -2.017859697341919, + "step": 234 + }, + { + "epoch": 0.4968615791212422, + "grad_norm": 1.9577795267105103, + "learning_rate": 5.920889636232351e-07, + "logits/chosen": -0.8078997731208801, + "logits/rejected": -0.8064825534820557, + "logps/chosen": -0.8004480004310608, + "logps/rejected": -0.9856831431388855, + "loss": 1.2273, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.6008960008621216, + "rewards/margins": 0.3704703152179718, + "rewards/rejected": -1.971366286277771, + "step": 235 + }, + { + "epoch": 0.4989758837132474, + "grad_norm": 2.5050246715545654, + "learning_rate": 5.884451375612865e-07, + "logits/chosen": -0.7499472498893738, + "logits/rejected": -0.7421904802322388, + "logps/chosen": -0.8363584876060486, + "logps/rejected": -0.9543781876564026, + "loss": 1.3002, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6727169752120972, + "rewards/margins": 0.23603934049606323, + "rewards/rejected": -1.9087563753128052, + "step": 236 + }, + { + "epoch": 0.5010901883052528, + "grad_norm": 0.585436224937439, + "learning_rate": 5.847964559317128e-07, + "logits/chosen": -0.730015218257904, + "logits/rejected": -0.7154791355133057, + "logps/chosen": -0.8828849196434021, + "logps/rejected": -0.9897070527076721, + "loss": 1.347, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.7657698392868042, + "rewards/margins": 0.21364440023899078, + "rewards/rejected": -1.9794141054153442, + "step": 237 + }, + { + "epoch": 0.503204492897258, + "grad_norm": 0.9204092621803284, + "learning_rate": 5.8114311904423e-07, + "logits/chosen": -0.759974479675293, + "logits/rejected": -0.7793674468994141, + "logps/chosen": -0.8321584463119507, + "logps/rejected": -1.0809751749038696, + "loss": 1.2185, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6643168926239014, + "rewards/margins": 0.4976334273815155, + "rewards/rejected": -2.1619503498077393, + "step": 238 + }, + { + "epoch": 0.5053187974892633, + "grad_norm": 5.147011756896973, + "learning_rate": 5.774853274641243e-07, + "logits/chosen": -0.7148956060409546, + "logits/rejected": -0.7363921403884888, + "logps/chosen": -0.8623124361038208, + "logps/rejected": -1.0681498050689697, + "loss": 1.2353, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7246248722076416, + "rewards/margins": 0.4116746187210083, + "rewards/rejected": -2.1362996101379395, + "step": 239 + }, + { + "epoch": 0.5074331020812686, + "grad_norm": 1.9065529108047485, + "learning_rate": 5.738232820012407e-07, + "logits/chosen": -0.7158540487289429, + "logits/rejected": -0.7083900570869446, + "logps/chosen": -0.981558620929718, + "logps/rejected": -1.054612636566162, + "loss": 1.3594, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.963117241859436, + "rewards/margins": 0.14610806107521057, + "rewards/rejected": -2.109225273132324, + "step": 240 + }, + { + "epoch": 0.5095474066732739, + "grad_norm": 2.4411256313323975, + "learning_rate": 5.701571836989591e-07, + "logits/chosen": -0.8441444039344788, + "logits/rejected": -0.8529233336448669, + "logps/chosen": -0.8665949702262878, + "logps/rejected": -1.030572772026062, + "loss": 1.2477, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.7331899404525757, + "rewards/margins": 0.3279556334018707, + "rewards/rejected": -2.061145544052124, + "step": 241 + }, + { + "epoch": 0.5116617112652792, + "grad_norm": 2.461113214492798, + "learning_rate": 5.664872338231571e-07, + "logits/chosen": -0.7463312149047852, + "logits/rejected": -0.7725105285644531, + "logps/chosen": -0.9185941815376282, + "logps/rejected": -1.1244423389434814, + "loss": 1.2404, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.8371883630752563, + "rewards/margins": 0.411696195602417, + "rewards/rejected": -2.248884677886963, + "step": 242 + }, + { + "epoch": 0.5137760158572844, + "grad_norm": 3.5861761569976807, + "learning_rate": 5.628136338511607e-07, + "logits/chosen": -0.8432914018630981, + "logits/rejected": -0.85801100730896, + "logps/chosen": -0.8873915672302246, + "logps/rejected": -1.0090795755386353, + "loss": 1.3072, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.7747831344604492, + "rewards/margins": 0.24337637424468994, + "rewards/rejected": -2.0181591510772705, + "step": 243 + }, + { + "epoch": 0.5158903204492897, + "grad_norm": 2.109071969985962, + "learning_rate": 5.591365854606829e-07, + "logits/chosen": -0.7899532318115234, + "logits/rejected": -0.7548331618309021, + "logps/chosen": -0.9333330392837524, + "logps/rejected": -1.00949227809906, + "loss": 1.3749, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.8666660785675049, + "rewards/margins": 0.1523183286190033, + "rewards/rejected": -2.01898455619812, + "step": 244 + }, + { + "epoch": 0.518004625041295, + "grad_norm": 2.2017955780029297, + "learning_rate": 5.554562905187527e-07, + "logits/chosen": -0.7569047212600708, + "logits/rejected": -0.7679808735847473, + "logps/chosen": -0.9779613614082336, + "logps/rejected": -1.1713427305221558, + "loss": 1.2628, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.9559227228164673, + "rewards/margins": 0.3867628276348114, + "rewards/rejected": -2.3426854610443115, + "step": 245 + }, + { + "epoch": 0.5201189296333003, + "grad_norm": 4.651991367340088, + "learning_rate": 5.517729510706315e-07, + "logits/chosen": -0.8546395301818848, + "logits/rejected": -0.8609369397163391, + "logps/chosen": -0.9926605224609375, + "logps/rejected": -1.1553713083267212, + "loss": 1.2812, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.985321044921875, + "rewards/margins": 0.32542160153388977, + "rewards/rejected": -2.3107426166534424, + "step": 246 + }, + { + "epoch": 0.5222332342253055, + "grad_norm": 2.6384060382843018, + "learning_rate": 5.480867693287223e-07, + "logits/chosen": -0.7734386324882507, + "logits/rejected": -0.7963250875473022, + "logps/chosen": -0.8996341824531555, + "logps/rejected": -1.0466523170471191, + "loss": 1.2849, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.799268364906311, + "rewards/margins": 0.2940361201763153, + "rewards/rejected": -2.0933046340942383, + "step": 247 + }, + { + "epoch": 0.5243475388173109, + "grad_norm": 1.3608977794647217, + "learning_rate": 5.443979476614674e-07, + "logits/chosen": -0.7350472807884216, + "logits/rejected": -0.7215992212295532, + "logps/chosen": -0.8887076377868652, + "logps/rejected": -1.0147045850753784, + "loss": 1.3182, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7774152755737305, + "rewards/margins": 0.25199398398399353, + "rewards/rejected": -2.029409170150757, + "step": 248 + }, + { + "epoch": 0.5264618434093161, + "grad_norm": 3.017115354537964, + "learning_rate": 5.407066885822391e-07, + "logits/chosen": -0.827782154083252, + "logits/rejected": -0.8471929430961609, + "logps/chosen": -0.9262440800666809, + "logps/rejected": -1.1658306121826172, + "loss": 1.1882, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.8524881601333618, + "rewards/margins": 0.47917306423187256, + "rewards/rejected": -2.3316612243652344, + "step": 249 + }, + { + "epoch": 0.5285761480013215, + "grad_norm": 0.7805312275886536, + "learning_rate": 5.370131947382214e-07, + "logits/chosen": -0.7815499305725098, + "logits/rejected": -0.8279274702072144, + "logps/chosen": -0.968708872795105, + "logps/rejected": -1.2697322368621826, + "loss": 1.2092, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.93741774559021, + "rewards/margins": 0.6020466685295105, + "rewards/rejected": -2.5394644737243652, + "step": 250 + }, + { + "epoch": 0.5306904525933267, + "grad_norm": 2.229363441467285, + "learning_rate": 5.333176688992855e-07, + "logits/chosen": -0.7824153900146484, + "logits/rejected": -0.8154900074005127, + "logps/chosen": -1.0211957693099976, + "logps/rejected": -1.2145965099334717, + "loss": 1.3074, + "rewards/accuracies": 0.609375, + "rewards/chosen": -2.042391538619995, + "rewards/margins": 0.3868010938167572, + "rewards/rejected": -2.4291930198669434, + "step": 251 + }, + { + "epoch": 0.532804757185332, + "grad_norm": 1.1359837055206299, + "learning_rate": 5.296203139468571e-07, + "logits/chosen": -0.7467613220214844, + "logits/rejected": -0.7548531889915466, + "logps/chosen": -1.0614902973175049, + "logps/rejected": -1.2674376964569092, + "loss": 1.2512, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.1229805946350098, + "rewards/margins": 0.4118950664997101, + "rewards/rejected": -2.5348753929138184, + "step": 252 + }, + { + "epoch": 0.5349190617773373, + "grad_norm": 3.0548548698425293, + "learning_rate": 5.259213328627792e-07, + "logits/chosen": -0.7868636250495911, + "logits/rejected": -0.8130850791931152, + "logps/chosen": -1.0743666887283325, + "logps/rejected": -1.2010191679000854, + "loss": 1.3275, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.148733377456665, + "rewards/margins": 0.2533051669597626, + "rewards/rejected": -2.402038335800171, + "step": 253 + }, + { + "epoch": 0.5370333663693426, + "grad_norm": 1.7205246686935425, + "learning_rate": 5.222209287181676e-07, + "logits/chosen": -0.81404709815979, + "logits/rejected": -0.8481613397598267, + "logps/chosen": -1.1599587202072144, + "logps/rejected": -1.4234716892242432, + "loss": 1.2894, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.3199174404144287, + "rewards/margins": 0.5270256400108337, + "rewards/rejected": -2.8469433784484863, + "step": 254 + }, + { + "epoch": 0.5391476709613479, + "grad_norm": 2.2516112327575684, + "learning_rate": 5.185193046622634e-07, + "logits/chosen": -0.8112510442733765, + "logits/rejected": -0.8310728073120117, + "logps/chosen": -1.1263186931610107, + "logps/rejected": -1.3256827592849731, + "loss": 1.3552, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.2526373863220215, + "rewards/margins": 0.39872825145721436, + "rewards/rejected": -2.6513655185699463, + "step": 255 + }, + { + "epoch": 0.5412619755533532, + "grad_norm": 2.8379359245300293, + "learning_rate": 5.148166639112799e-07, + "logits/chosen": -0.8202102184295654, + "logits/rejected": -0.845209002494812, + "logps/chosen": -1.264180302619934, + "logps/rejected": -1.6190590858459473, + "loss": 1.2083, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.528360605239868, + "rewards/margins": 0.7097575068473816, + "rewards/rejected": -3.2381181716918945, + "step": 256 + } + ], + "logging_steps": 1, + "max_steps": 472, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-320/config.json b/checkpoint-320/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ead276a9e0bbd4a0baae360715d8e80853974eb6 --- /dev/null +++ b/checkpoint-320/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "/home/v-liuzichuan/huggingface/Llama-3.2-3B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/checkpoint-320/generation_config.json b/checkpoint-320/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18d16b4cc3c799dd78e854fd824ddf8a2996285 --- /dev/null +++ b/checkpoint-320/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.2" +} diff --git a/checkpoint-320/model-00001-of-00002.safetensors b/checkpoint-320/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1cbc259cc3f44ab7354a25c1b79a605eb475d0e7 --- /dev/null +++ b/checkpoint-320/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89bf8fd766ede1249d0f9602fe70fc459781b09af249bf6066a16b8ae82e415f +size 4965799096 diff --git a/checkpoint-320/model-00002-of-00002.safetensors b/checkpoint-320/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..775ad735b04fbd86f63cdd0639c17c45a6bfeabf --- /dev/null +++ b/checkpoint-320/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b6b8f7379ca5f2205b273d5a342bbe0f3a79dd1a5b5d0ffcd71b488f8a7196 +size 2247734992 diff --git a/checkpoint-320/model.safetensors.index.json b/checkpoint-320/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ed64de846d720b9a7859dc20575fea8e8ca51940 --- /dev/null +++ b/checkpoint-320/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_size": 7213504512 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-320/special_tokens_map.json b/checkpoint-320/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-320/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-320/tokenizer.json b/checkpoint-320/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-320/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-320/tokenizer_config.json b/checkpoint-320/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdb7bfcc59fd293867d98e124a30b3ddf39c93 --- /dev/null +++ b/checkpoint-320/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-320/trainer_state.json b/checkpoint-320/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9775f9051b05bf05b1011bb8e630d79e75523b26 --- /dev/null +++ b/checkpoint-320/trainer_state.json @@ -0,0 +1,4833 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6765774694416914, + "eval_steps": 500, + "global_step": 320, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002114304592005286, + "grad_norm": 1.5018059015274048, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.3466828167438507, + "logits/rejected": -0.30099987983703613, + "logps/chosen": -0.9345186948776245, + "logps/rejected": -0.9117153882980347, + "loss": 1.4889, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.869037389755249, + "rewards/margins": -0.04560665041208267, + "rewards/rejected": -1.8234307765960693, + "step": 1 + }, + { + "epoch": 0.004228609184010572, + "grad_norm": 0.8093975186347961, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.4310421049594879, + "logits/rejected": -0.39132067561149597, + "logps/chosen": -0.8198825716972351, + "logps/rejected": -0.8644211888313293, + "loss": 1.376, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.6397651433944702, + "rewards/margins": 0.08907715976238251, + "rewards/rejected": -1.7288423776626587, + "step": 2 + }, + { + "epoch": 0.006342913776015857, + "grad_norm": 0.5377389788627625, + "learning_rate": 6.25e-08, + "logits/chosen": -0.46692028641700745, + "logits/rejected": -0.4649256467819214, + "logps/chosen": -0.9087910652160645, + "logps/rejected": -0.9648240804672241, + "loss": 1.3404, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.817582130432129, + "rewards/margins": 0.11206617206335068, + "rewards/rejected": -1.9296481609344482, + "step": 3 + }, + { + "epoch": 0.008457218368021144, + "grad_norm": 0.3221875727176666, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.416828453540802, + "logits/rejected": -0.3584724962711334, + "logps/chosen": -0.7818898558616638, + "logps/rejected": -0.8170815110206604, + "loss": 1.3806, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.5637797117233276, + "rewards/margins": 0.07038339227437973, + "rewards/rejected": -1.6341630220413208, + "step": 4 + }, + { + "epoch": 0.010571522960026428, + "grad_norm": 0.64655601978302, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.376886248588562, + "logits/rejected": -0.3516141474246979, + "logps/chosen": -0.8814125061035156, + "logps/rejected": -1.0214396715164185, + "loss": 1.2741, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7628250122070312, + "rewards/margins": 0.28005433082580566, + "rewards/rejected": -2.042879343032837, + "step": 5 + }, + { + "epoch": 0.012685827552031714, + "grad_norm": 0.4775894582271576, + "learning_rate": 1.25e-07, + "logits/chosen": -0.4757865369319916, + "logits/rejected": -0.4498941898345947, + "logps/chosen": -0.8962199687957764, + "logps/rejected": -0.9462199807167053, + "loss": 1.364, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.7924399375915527, + "rewards/margins": 0.10000008344650269, + "rewards/rejected": -1.8924399614334106, + "step": 6 + }, + { + "epoch": 0.014800132144037, + "grad_norm": 1.2459568977355957, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.38895344734191895, + "logits/rejected": -0.38165366649627686, + "logps/chosen": -0.9025766253471375, + "logps/rejected": -0.9465017318725586, + "loss": 1.3898, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.805153250694275, + "rewards/margins": 0.0878501906991005, + "rewards/rejected": -1.8930034637451172, + "step": 7 + }, + { + "epoch": 0.016914436736042288, + "grad_norm": 0.6195729374885559, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.3964853286743164, + "logits/rejected": -0.377862811088562, + "logps/chosen": -0.9054160118103027, + "logps/rejected": -0.9605879187583923, + "loss": 1.3821, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8108320236206055, + "rewards/margins": 0.1103438138961792, + "rewards/rejected": -1.9211758375167847, + "step": 8 + }, + { + "epoch": 0.019028741328047574, + "grad_norm": 1.2074137926101685, + "learning_rate": 1.875e-07, + "logits/chosen": -0.3729037344455719, + "logits/rejected": -0.38143450021743774, + "logps/chosen": -0.9328653216362, + "logps/rejected": -0.9905799627304077, + "loss": 1.3754, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8657306432724, + "rewards/margins": 0.11542946100234985, + "rewards/rejected": -1.9811599254608154, + "step": 9 + }, + { + "epoch": 0.021143045920052856, + "grad_norm": 0.2867220640182495, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.4263336658477783, + "logits/rejected": -0.42903271317481995, + "logps/chosen": -0.8979260325431824, + "logps/rejected": -0.9078099727630615, + "loss": 1.4438, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7958520650863647, + "rewards/margins": 0.019767940044403076, + "rewards/rejected": -1.815619945526123, + "step": 10 + }, + { + "epoch": 0.023257350512058142, + "grad_norm": 0.8363026976585388, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.3374914526939392, + "logits/rejected": -0.32399696111679077, + "logps/chosen": -0.8886098861694336, + "logps/rejected": -0.9484556317329407, + "loss": 1.3422, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7772197723388672, + "rewards/margins": 0.11969132721424103, + "rewards/rejected": -1.8969112634658813, + "step": 11 + }, + { + "epoch": 0.025371655104063428, + "grad_norm": 0.5406804084777832, + "learning_rate": 2.5e-07, + "logits/chosen": -0.42844679951667786, + "logits/rejected": -0.37984615564346313, + "logps/chosen": -0.861629843711853, + "logps/rejected": -0.8968492150306702, + "loss": 1.3922, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.723259687423706, + "rewards/margins": 0.07043875753879547, + "rewards/rejected": -1.7936984300613403, + "step": 12 + }, + { + "epoch": 0.027485959696068714, + "grad_norm": 0.9919329285621643, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.36495402455329895, + "logits/rejected": -0.3249490261077881, + "logps/chosen": -0.8502095937728882, + "logps/rejected": -0.8470643758773804, + "loss": 1.4334, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7004191875457764, + "rewards/margins": -0.006290358491241932, + "rewards/rejected": -1.6941287517547607, + "step": 13 + }, + { + "epoch": 0.029600264288074, + "grad_norm": 0.5477162003517151, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.4155704081058502, + "logits/rejected": -0.39535820484161377, + "logps/chosen": -1.0430240631103516, + "logps/rejected": -1.1318373680114746, + "loss": 1.3533, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.086048126220703, + "rewards/margins": 0.17762640118598938, + "rewards/rejected": -2.263674736022949, + "step": 14 + }, + { + "epoch": 0.031714568880079286, + "grad_norm": 0.26530712842941284, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.4810572564601898, + "logits/rejected": -0.42454615235328674, + "logps/chosen": -0.8741041421890259, + "logps/rejected": -0.9494178295135498, + "loss": 1.3655, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7482082843780518, + "rewards/margins": 0.15062758326530457, + "rewards/rejected": -1.8988356590270996, + "step": 15 + }, + { + "epoch": 0.033828873472084575, + "grad_norm": 0.9272629618644714, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.4440098702907562, + "logits/rejected": -0.3930297791957855, + "logps/chosen": -0.8473359942436218, + "logps/rejected": -0.9369213581085205, + "loss": 1.3248, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6946719884872437, + "rewards/margins": 0.17917080223560333, + "rewards/rejected": -1.873842716217041, + "step": 16 + }, + { + "epoch": 0.03594317806408986, + "grad_norm": 0.5912418961524963, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.3838099539279938, + "logits/rejected": -0.3507584035396576, + "logps/chosen": -0.8888350129127502, + "logps/rejected": -0.9361770749092102, + "loss": 1.383, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.7776700258255005, + "rewards/margins": 0.0946839451789856, + "rewards/rejected": -1.8723541498184204, + "step": 17 + }, + { + "epoch": 0.03805748265609515, + "grad_norm": 0.6536504030227661, + "learning_rate": 3.75e-07, + "logits/chosen": -0.3581697940826416, + "logits/rejected": -0.3620460629463196, + "logps/chosen": -0.8519617319107056, + "logps/rejected": -0.9022184610366821, + "loss": 1.3841, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7039234638214111, + "rewards/margins": 0.10051343590021133, + "rewards/rejected": -1.8044369220733643, + "step": 18 + }, + { + "epoch": 0.04017178724810043, + "grad_norm": 0.3433632552623749, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.37887442111968994, + "logits/rejected": -0.37543320655822754, + "logps/chosen": -0.9464104175567627, + "logps/rejected": -1.0017329454421997, + "loss": 1.3649, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8928208351135254, + "rewards/margins": 0.11064518243074417, + "rewards/rejected": -2.0034658908843994, + "step": 19 + }, + { + "epoch": 0.04228609184010571, + "grad_norm": 0.9764007329940796, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.44110679626464844, + "logits/rejected": -0.4280649721622467, + "logps/chosen": -0.9046768546104431, + "logps/rejected": -1.0464633703231812, + "loss": 1.2592, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.8093537092208862, + "rewards/margins": 0.2835729420185089, + "rewards/rejected": -2.0929267406463623, + "step": 20 + }, + { + "epoch": 0.044400396432111, + "grad_norm": 1.8563830852508545, + "learning_rate": 4.375e-07, + "logits/chosen": -0.45183491706848145, + "logits/rejected": -0.42935287952423096, + "logps/chosen": -0.9043138027191162, + "logps/rejected": -0.9462392926216125, + "loss": 1.3784, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.8086276054382324, + "rewards/margins": 0.08385094255208969, + "rewards/rejected": -1.892478585243225, + "step": 21 + }, + { + "epoch": 0.046514701024116284, + "grad_norm": 1.3473299741744995, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.37855517864227295, + "logits/rejected": -0.34429043531417847, + "logps/chosen": -0.9284683465957642, + "logps/rejected": -0.9454050064086914, + "loss": 1.4346, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.8569366931915283, + "rewards/margins": 0.03387312963604927, + "rewards/rejected": -1.8908100128173828, + "step": 22 + }, + { + "epoch": 0.04862900561612157, + "grad_norm": 0.940831184387207, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.39172160625457764, + "logits/rejected": -0.3695780634880066, + "logps/chosen": -0.9314202666282654, + "logps/rejected": -1.020229697227478, + "loss": 1.3322, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8628405332565308, + "rewards/margins": 0.17761869728565216, + "rewards/rejected": -2.040459394454956, + "step": 23 + }, + { + "epoch": 0.050743310208126856, + "grad_norm": 0.5783158540725708, + "learning_rate": 5e-07, + "logits/chosen": -0.4958629608154297, + "logits/rejected": -0.4257377088069916, + "logps/chosen": -0.9379237294197083, + "logps/rejected": -0.9415461421012878, + "loss": 1.441, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.8758474588394165, + "rewards/margins": 0.0072449808940291405, + "rewards/rejected": -1.8830922842025757, + "step": 24 + }, + { + "epoch": 0.052857614800132145, + "grad_norm": 1.4209853410720825, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -0.36407172679901123, + "logits/rejected": -0.3331725299358368, + "logps/chosen": -0.9192589521408081, + "logps/rejected": -0.9595308899879456, + "loss": 1.3994, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8385179042816162, + "rewards/margins": 0.080544114112854, + "rewards/rejected": -1.9190617799758911, + "step": 25 + }, + { + "epoch": 0.05497191939213743, + "grad_norm": 0.6310216188430786, + "learning_rate": 5.416666666666666e-07, + "logits/chosen": -0.41772690415382385, + "logits/rejected": -0.36565953493118286, + "logps/chosen": -0.8052878379821777, + "logps/rejected": -0.8673746585845947, + "loss": 1.3356, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6105756759643555, + "rewards/margins": 0.12417369335889816, + "rewards/rejected": -1.7347493171691895, + "step": 26 + }, + { + "epoch": 0.05708622398414272, + "grad_norm": 1.2933462858200073, + "learning_rate": 5.625e-07, + "logits/chosen": -0.4482795000076294, + "logits/rejected": -0.39409321546554565, + "logps/chosen": -0.8339261412620544, + "logps/rejected": -0.8675202131271362, + "loss": 1.3739, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.6678522825241089, + "rewards/margins": 0.06718815863132477, + "rewards/rejected": -1.7350404262542725, + "step": 27 + }, + { + "epoch": 0.059200528576148, + "grad_norm": 0.5808025002479553, + "learning_rate": 5.833333333333334e-07, + "logits/chosen": -0.37116044759750366, + "logits/rejected": -0.3478051722049713, + "logps/chosen": -0.8950318694114685, + "logps/rejected": -0.9756672978401184, + "loss": 1.3505, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.790063738822937, + "rewards/margins": 0.1612708568572998, + "rewards/rejected": -1.9513345956802368, + "step": 28 + }, + { + "epoch": 0.06131483316815329, + "grad_norm": 1.0569533109664917, + "learning_rate": 6.041666666666666e-07, + "logits/chosen": -0.421148419380188, + "logits/rejected": -0.38443076610565186, + "logps/chosen": -0.8021283745765686, + "logps/rejected": -0.8370179533958435, + "loss": 1.3916, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.6042567491531372, + "rewards/margins": 0.06977920234203339, + "rewards/rejected": -1.674035906791687, + "step": 29 + }, + { + "epoch": 0.06342913776015857, + "grad_norm": 0.42577147483825684, + "learning_rate": 6.249999999999999e-07, + "logits/chosen": -0.4429818391799927, + "logits/rejected": -0.3524704575538635, + "logps/chosen": -0.8916822671890259, + "logps/rejected": -0.8985542058944702, + "loss": 1.4321, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.7833645343780518, + "rewards/margins": 0.01374388113617897, + "rewards/rejected": -1.7971084117889404, + "step": 30 + }, + { + "epoch": 0.06554344235216386, + "grad_norm": 1.0056904554367065, + "learning_rate": 6.458333333333333e-07, + "logits/chosen": -0.376451700925827, + "logits/rejected": -0.342519074678421, + "logps/chosen": -0.9038617014884949, + "logps/rejected": -0.953092634677887, + "loss": 1.398, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8077234029769897, + "rewards/margins": 0.09846188127994537, + "rewards/rejected": -1.906185269355774, + "step": 31 + }, + { + "epoch": 0.06765774694416915, + "grad_norm": 0.5494012236595154, + "learning_rate": 6.666666666666666e-07, + "logits/chosen": -0.3459138870239258, + "logits/rejected": -0.3590989410877228, + "logps/chosen": -0.8274999260902405, + "logps/rejected": -0.8776509761810303, + "loss": 1.363, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.654999852180481, + "rewards/margins": 0.1003020703792572, + "rewards/rejected": -1.7553019523620605, + "step": 32 + }, + { + "epoch": 0.06977205153617443, + "grad_norm": 0.693267822265625, + "learning_rate": 6.875e-07, + "logits/chosen": -0.40053680539131165, + "logits/rejected": -0.37323904037475586, + "logps/chosen": -0.8255244493484497, + "logps/rejected": -0.8658804893493652, + "loss": 1.3712, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6510488986968994, + "rewards/margins": 0.08071210980415344, + "rewards/rejected": -1.7317609786987305, + "step": 33 + }, + { + "epoch": 0.07188635612817972, + "grad_norm": 2.213238000869751, + "learning_rate": 7.083333333333334e-07, + "logits/chosen": -0.40097948908805847, + "logits/rejected": -0.38190510869026184, + "logps/chosen": -0.9122671484947205, + "logps/rejected": -0.9549552798271179, + "loss": 1.36, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.824534296989441, + "rewards/margins": 0.0853763073682785, + "rewards/rejected": -1.9099105596542358, + "step": 34 + }, + { + "epoch": 0.074000660720185, + "grad_norm": 0.6859830021858215, + "learning_rate": 7.291666666666666e-07, + "logits/chosen": -0.42501094937324524, + "logits/rejected": -0.42549416422843933, + "logps/chosen": -1.0008373260498047, + "logps/rejected": -1.1157118082046509, + "loss": 1.3294, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.0016746520996094, + "rewards/margins": 0.22974897921085358, + "rewards/rejected": -2.2314236164093018, + "step": 35 + }, + { + "epoch": 0.0761149653121903, + "grad_norm": 0.6468721628189087, + "learning_rate": 7.5e-07, + "logits/chosen": -0.36494994163513184, + "logits/rejected": -0.30433908104896545, + "logps/chosen": -0.9062094688415527, + "logps/rejected": -0.920263409614563, + "loss": 1.4312, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.8124189376831055, + "rewards/margins": 0.02810765616595745, + "rewards/rejected": -1.840526819229126, + "step": 36 + }, + { + "epoch": 0.07822926990419557, + "grad_norm": 0.5085556507110596, + "learning_rate": 7.708333333333333e-07, + "logits/chosen": -0.4677881598472595, + "logits/rejected": -0.456132709980011, + "logps/chosen": -1.0101865530014038, + "logps/rejected": -1.0429682731628418, + "loss": 1.4132, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -2.0203731060028076, + "rewards/margins": 0.06556359678506851, + "rewards/rejected": -2.0859365463256836, + "step": 37 + }, + { + "epoch": 0.08034357449620086, + "grad_norm": 0.23813335597515106, + "learning_rate": 7.916666666666666e-07, + "logits/chosen": -0.3991190791130066, + "logits/rejected": -0.3664044141769409, + "logps/chosen": -0.9578174352645874, + "logps/rejected": -0.9229263067245483, + "loss": 1.4824, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.9156348705291748, + "rewards/margins": -0.06978224962949753, + "rewards/rejected": -1.8458526134490967, + "step": 38 + }, + { + "epoch": 0.08245787908820615, + "grad_norm": 0.587037980556488, + "learning_rate": 8.125e-07, + "logits/chosen": -0.37554049491882324, + "logits/rejected": -0.36305734515190125, + "logps/chosen": -0.8503091931343079, + "logps/rejected": -0.864615261554718, + "loss": 1.4086, + "rewards/accuracies": 0.4296875, + "rewards/chosen": -1.7006183862686157, + "rewards/margins": 0.028611989691853523, + "rewards/rejected": -1.729230523109436, + "step": 39 + }, + { + "epoch": 0.08457218368021142, + "grad_norm": 0.4172501862049103, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.4405443072319031, + "logits/rejected": -0.41723060607910156, + "logps/chosen": -0.8502858877182007, + "logps/rejected": -0.9114271402359009, + "loss": 1.3446, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7005717754364014, + "rewards/margins": 0.12228240817785263, + "rewards/rejected": -1.8228542804718018, + "step": 40 + }, + { + "epoch": 0.08668648827221671, + "grad_norm": 0.9275372624397278, + "learning_rate": 8.541666666666666e-07, + "logits/chosen": -0.4200601577758789, + "logits/rejected": -0.3478623628616333, + "logps/chosen": -0.892408013343811, + "logps/rejected": -0.9276402592658997, + "loss": 1.3887, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.784816026687622, + "rewards/margins": 0.07046431303024292, + "rewards/rejected": -1.8552805185317993, + "step": 41 + }, + { + "epoch": 0.088800792864222, + "grad_norm": 0.7317383289337158, + "learning_rate": 8.75e-07, + "logits/chosen": -0.37675267457962036, + "logits/rejected": -0.33540332317352295, + "logps/chosen": -0.7866061925888062, + "logps/rejected": -0.824250340461731, + "loss": 1.3837, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5732123851776123, + "rewards/margins": 0.07528844475746155, + "rewards/rejected": -1.648500680923462, + "step": 42 + }, + { + "epoch": 0.09091509745622729, + "grad_norm": 0.9452736973762512, + "learning_rate": 8.958333333333334e-07, + "logits/chosen": -0.4662383198738098, + "logits/rejected": -0.4447881579399109, + "logps/chosen": -0.9490666389465332, + "logps/rejected": -1.0112388134002686, + "loss": 1.3412, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8981332778930664, + "rewards/margins": 0.12434446066617966, + "rewards/rejected": -2.022477626800537, + "step": 43 + }, + { + "epoch": 0.09302940204823257, + "grad_norm": 0.2848323881626129, + "learning_rate": 9.166666666666665e-07, + "logits/chosen": -0.41404005885124207, + "logits/rejected": -0.3944583535194397, + "logps/chosen": -0.8224930167198181, + "logps/rejected": -0.8416361808776855, + "loss": 1.4027, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.6449860334396362, + "rewards/margins": 0.038286346942186356, + "rewards/rejected": -1.683272361755371, + "step": 44 + }, + { + "epoch": 0.09514370664023786, + "grad_norm": 0.7165678143501282, + "learning_rate": 9.374999999999999e-07, + "logits/chosen": -0.40475326776504517, + "logits/rejected": -0.3559921383857727, + "logps/chosen": -0.8070214986801147, + "logps/rejected": -0.8993593454360962, + "loss": 1.3148, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6140429973602295, + "rewards/margins": 0.18467575311660767, + "rewards/rejected": -1.7987186908721924, + "step": 45 + }, + { + "epoch": 0.09725801123224315, + "grad_norm": 0.4779021739959717, + "learning_rate": 9.583333333333334e-07, + "logits/chosen": -0.4171525835990906, + "logits/rejected": -0.42166149616241455, + "logps/chosen": -0.7872560024261475, + "logps/rejected": -0.8496187925338745, + "loss": 1.3356, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.574512004852295, + "rewards/margins": 0.12472567707300186, + "rewards/rejected": -1.699237585067749, + "step": 46 + }, + { + "epoch": 0.09937231582424844, + "grad_norm": 0.7870219349861145, + "learning_rate": 9.791666666666667e-07, + "logits/chosen": -0.3734116554260254, + "logits/rejected": -0.32778748869895935, + "logps/chosen": -0.7842286825180054, + "logps/rejected": -0.8161548972129822, + "loss": 1.3647, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5684573650360107, + "rewards/margins": 0.06385258585214615, + "rewards/rejected": -1.6323097944259644, + "step": 47 + }, + { + "epoch": 0.10148662041625371, + "grad_norm": 0.2597256600856781, + "learning_rate": 1e-06, + "logits/chosen": -0.4355677664279938, + "logits/rejected": -0.38983187079429626, + "logps/chosen": -0.8787693977355957, + "logps/rejected": -0.9383041262626648, + "loss": 1.35, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.7575387954711914, + "rewards/margins": 0.11906948685646057, + "rewards/rejected": -1.8766082525253296, + "step": 48 + }, + { + "epoch": 0.103600925008259, + "grad_norm": 0.9942799210548401, + "learning_rate": 9.999862751990697e-07, + "logits/chosen": -0.4244321882724762, + "logits/rejected": -0.4366786777973175, + "logps/chosen": -0.7910157442092896, + "logps/rejected": -0.8630884885787964, + "loss": 1.3166, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.582031488418579, + "rewards/margins": 0.14414538443088531, + "rewards/rejected": -1.7261769771575928, + "step": 49 + }, + { + "epoch": 0.10571522960026429, + "grad_norm": 0.5333903431892395, + "learning_rate": 9.999451015497595e-07, + "logits/chosen": -0.389942467212677, + "logits/rejected": -0.36674585938453674, + "logps/chosen": -0.7312074899673462, + "logps/rejected": -0.7289648652076721, + "loss": 1.4225, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4624149799346924, + "rewards/margins": -0.004485193639993668, + "rewards/rejected": -1.4579297304153442, + "step": 50 + }, + { + "epoch": 0.10782953419226958, + "grad_norm": 0.5712242722511292, + "learning_rate": 9.9987648131247e-07, + "logits/chosen": -0.4622853994369507, + "logits/rejected": -0.3728552460670471, + "logps/chosen": -0.8764299750328064, + "logps/rejected": -0.869678795337677, + "loss": 1.4542, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.7528599500656128, + "rewards/margins": -0.013502337038516998, + "rewards/rejected": -1.739357590675354, + "step": 51 + }, + { + "epoch": 0.10994383878427486, + "grad_norm": 0.2586441934108734, + "learning_rate": 9.99780418254397e-07, + "logits/chosen": -0.37249019742012024, + "logits/rejected": -0.3998304605484009, + "logps/chosen": -0.8435611724853516, + "logps/rejected": -0.9359882473945618, + "loss": 1.3057, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6871223449707031, + "rewards/margins": 0.18485431373119354, + "rewards/rejected": -1.8719764947891235, + "step": 52 + }, + { + "epoch": 0.11205814337628014, + "grad_norm": 1.0829113721847534, + "learning_rate": 9.996569176493268e-07, + "logits/chosen": -0.47697725892066956, + "logits/rejected": -0.4208195209503174, + "logps/chosen": -0.8014968037605286, + "logps/rejected": -0.8703804612159729, + "loss": 1.3523, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6029936075210571, + "rewards/margins": 0.1377674788236618, + "rewards/rejected": -1.7407609224319458, + "step": 53 + }, + { + "epoch": 0.11417244796828543, + "grad_norm": 0.5523208379745483, + "learning_rate": 9.995059862773438e-07, + "logits/chosen": -0.40533363819122314, + "logits/rejected": -0.36801978945732117, + "logps/chosen": -0.7641825675964355, + "logps/rejected": -0.8168596029281616, + "loss": 1.3692, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.528365135192871, + "rewards/margins": 0.10535416752099991, + "rewards/rejected": -1.6337192058563232, + "step": 54 + }, + { + "epoch": 0.11628675256029072, + "grad_norm": 0.614101767539978, + "learning_rate": 9.993276324244605e-07, + "logits/chosen": -0.4476906955242157, + "logits/rejected": -0.40396648645401, + "logps/chosen": -0.8706808090209961, + "logps/rejected": -0.9221430420875549, + "loss": 1.3787, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7413616180419922, + "rewards/margins": 0.10292442888021469, + "rewards/rejected": -1.8442860841751099, + "step": 55 + }, + { + "epoch": 0.118401057152296, + "grad_norm": 0.3428778052330017, + "learning_rate": 9.991218658821608e-07, + "logits/chosen": -0.31709593534469604, + "logits/rejected": -0.2760937213897705, + "logps/chosen": -0.842248797416687, + "logps/rejected": -0.8068034648895264, + "loss": 1.498, + "rewards/accuracies": 0.4375, + "rewards/chosen": -1.684497594833374, + "rewards/margins": -0.07089066505432129, + "rewards/rejected": -1.6136069297790527, + "step": 56 + }, + { + "epoch": 0.12051536174430129, + "grad_norm": 0.6877723932266235, + "learning_rate": 9.988886979468643e-07, + "logits/chosen": -0.41800016164779663, + "logits/rejected": -0.4011584222316742, + "logps/chosen": -0.7845420837402344, + "logps/rejected": -0.834447979927063, + "loss": 1.3491, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5690841674804688, + "rewards/margins": 0.09981165081262589, + "rewards/rejected": -1.668895959854126, + "step": 57 + }, + { + "epoch": 0.12262966633630658, + "grad_norm": 0.9649701714515686, + "learning_rate": 9.98628141419305e-07, + "logits/chosen": -0.4253537058830261, + "logits/rejected": -0.4305458962917328, + "logps/chosen": -0.86476731300354, + "logps/rejected": -0.9080386161804199, + "loss": 1.3639, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.72953462600708, + "rewards/margins": 0.08654248714447021, + "rewards/rejected": -1.8160772323608398, + "step": 58 + }, + { + "epoch": 0.12474397092831185, + "grad_norm": 1.3779780864715576, + "learning_rate": 9.98340210603829e-07, + "logits/chosen": -0.39970022439956665, + "logits/rejected": -0.441428005695343, + "logps/chosen": -0.8662775158882141, + "logps/rejected": -0.9646260738372803, + "loss": 1.3001, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7325550317764282, + "rewards/margins": 0.19669723510742188, + "rewards/rejected": -1.9292521476745605, + "step": 59 + }, + { + "epoch": 0.12685827552031714, + "grad_norm": 0.5366966724395752, + "learning_rate": 9.980249213076084e-07, + "logits/chosen": -0.37770116329193115, + "logits/rejected": -0.35231757164001465, + "logps/chosen": -0.8165755867958069, + "logps/rejected": -0.8619179129600525, + "loss": 1.3699, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6331511735916138, + "rewards/margins": 0.09068439900875092, + "rewards/rejected": -1.723835825920105, + "step": 60 + }, + { + "epoch": 0.12897258011232243, + "grad_norm": 0.36810922622680664, + "learning_rate": 9.976822908397748e-07, + "logits/chosen": -0.4224976897239685, + "logits/rejected": -0.41758257150650024, + "logps/chosen": -0.8445641994476318, + "logps/rejected": -0.9393664598464966, + "loss": 1.3193, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6891283988952637, + "rewards/margins": 0.18960458040237427, + "rewards/rejected": -1.8787329196929932, + "step": 61 + }, + { + "epoch": 0.13108688470432772, + "grad_norm": 0.6838279366493225, + "learning_rate": 9.97312338010468e-07, + "logits/chosen": -0.4168627858161926, + "logits/rejected": -0.36115381121635437, + "logps/chosen": -0.8370552659034729, + "logps/rejected": -0.8352169394493103, + "loss": 1.4284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6741105318069458, + "rewards/margins": -0.0036766715347766876, + "rewards/rejected": -1.6704338788986206, + "step": 62 + }, + { + "epoch": 0.133201189296333, + "grad_norm": 0.39330533146858215, + "learning_rate": 9.969150831298037e-07, + "logits/chosen": -0.4558233618736267, + "logits/rejected": -0.4025765061378479, + "logps/chosen": -0.826255738735199, + "logps/rejected": -0.894213080406189, + "loss": 1.3485, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.652511477470398, + "rewards/margins": 0.13591471314430237, + "rewards/rejected": -1.788426160812378, + "step": 63 + }, + { + "epoch": 0.1353154938883383, + "grad_norm": 0.6055929660797119, + "learning_rate": 9.964905480067584e-07, + "logits/chosen": -0.459463506937027, + "logits/rejected": -0.42943331599235535, + "logps/chosen": -0.7901928424835205, + "logps/rejected": -0.7964221239089966, + "loss": 1.4057, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.580385684967041, + "rewards/margins": 0.012458762153983116, + "rewards/rejected": -1.5928442478179932, + "step": 64 + }, + { + "epoch": 0.13742979848034356, + "grad_norm": 0.37883859872817993, + "learning_rate": 9.960387559479725e-07, + "logits/chosen": -0.4447207450866699, + "logits/rejected": -0.371269553899765, + "logps/chosen": -0.7863065004348755, + "logps/rejected": -0.7983666658401489, + "loss": 1.4202, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.572613000869751, + "rewards/margins": 0.02412020042538643, + "rewards/rejected": -1.5967333316802979, + "step": 65 + }, + { + "epoch": 0.13954410307234885, + "grad_norm": 0.31330156326293945, + "learning_rate": 9.955597317564703e-07, + "logits/chosen": -0.42059677839279175, + "logits/rejected": -0.37605100870132446, + "logps/chosen": -0.7669360637664795, + "logps/rejected": -0.8348797559738159, + "loss": 1.3368, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.533872127532959, + "rewards/margins": 0.13588732481002808, + "rewards/rejected": -1.6697595119476318, + "step": 66 + }, + { + "epoch": 0.14165840766435414, + "grad_norm": 0.4353170096874237, + "learning_rate": 9.950535017302983e-07, + "logits/chosen": -0.3897082805633545, + "logits/rejected": -0.38229796290397644, + "logps/chosen": -0.7249190807342529, + "logps/rejected": -0.7696882486343384, + "loss": 1.3511, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4498381614685059, + "rewards/margins": 0.08953814208507538, + "rewards/rejected": -1.5393764972686768, + "step": 67 + }, + { + "epoch": 0.14377271225635943, + "grad_norm": 0.6724106669425964, + "learning_rate": 9.94520093661082e-07, + "logits/chosen": -0.3687596023082733, + "logits/rejected": -0.34222811460494995, + "logps/chosen": -0.7845972776412964, + "logps/rejected": -0.8308086395263672, + "loss": 1.3737, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5691945552825928, + "rewards/margins": 0.09242270141839981, + "rewards/rejected": -1.6616172790527344, + "step": 68 + }, + { + "epoch": 0.14588701684836472, + "grad_norm": 0.7312172651290894, + "learning_rate": 9.939595368324994e-07, + "logits/chosen": -0.4475817382335663, + "logits/rejected": -0.3975730538368225, + "logps/chosen": -0.7314785718917847, + "logps/rejected": -0.7924487590789795, + "loss": 1.3439, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4629571437835693, + "rewards/margins": 0.12194043397903442, + "rewards/rejected": -1.584897518157959, + "step": 69 + }, + { + "epoch": 0.14800132144037, + "grad_norm": 0.301097571849823, + "learning_rate": 9.933718620186744e-07, + "logits/chosen": -0.402032732963562, + "logits/rejected": -0.3640722632408142, + "logps/chosen": -0.7727882862091064, + "logps/rejected": -0.8291516304016113, + "loss": 1.358, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.545576572418213, + "rewards/margins": 0.11272668838500977, + "rewards/rejected": -1.6583032608032227, + "step": 70 + }, + { + "epoch": 0.1501156260323753, + "grad_norm": 0.377835750579834, + "learning_rate": 9.92757101482486e-07, + "logits/chosen": -0.316825270652771, + "logits/rejected": -0.3245603144168854, + "logps/chosen": -0.7962774634361267, + "logps/rejected": -0.8610175848007202, + "loss": 1.3464, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5925549268722534, + "rewards/margins": 0.12948019802570343, + "rewards/rejected": -1.7220351696014404, + "step": 71 + }, + { + "epoch": 0.1522299306243806, + "grad_norm": 0.84058678150177, + "learning_rate": 9.921152889737984e-07, + "logits/chosen": -0.4446060359477997, + "logits/rejected": -0.43160340189933777, + "logps/chosen": -0.7745426297187805, + "logps/rejected": -0.8286185264587402, + "loss": 1.3408, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.549085259437561, + "rewards/margins": 0.10815180093050003, + "rewards/rejected": -1.6572370529174805, + "step": 72 + }, + { + "epoch": 0.15434423521638585, + "grad_norm": 0.6970808506011963, + "learning_rate": 9.91446459727607e-07, + "logits/chosen": -0.4220297634601593, + "logits/rejected": -0.404453307390213, + "logps/chosen": -0.8769615888595581, + "logps/rejected": -0.9564313292503357, + "loss": 1.3423, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7539231777191162, + "rewards/margins": 0.15893957018852234, + "rewards/rejected": -1.9128626585006714, + "step": 73 + }, + { + "epoch": 0.15645853980839114, + "grad_norm": 0.5900676250457764, + "learning_rate": 9.90750650462105e-07, + "logits/chosen": -0.41884100437164307, + "logits/rejected": -0.38551777601242065, + "logps/chosen": -0.814996063709259, + "logps/rejected": -0.8892688751220703, + "loss": 1.3325, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.629992127418518, + "rewards/margins": 0.14854571223258972, + "rewards/rejected": -1.7785377502441406, + "step": 74 + }, + { + "epoch": 0.15857284440039643, + "grad_norm": 0.7245749831199646, + "learning_rate": 9.900278993766668e-07, + "logits/chosen": -0.3451727330684662, + "logits/rejected": -0.3348972201347351, + "logps/chosen": -0.8788102865219116, + "logps/rejected": -0.9273182153701782, + "loss": 1.377, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7576205730438232, + "rewards/margins": 0.09701582789421082, + "rewards/rejected": -1.8546364307403564, + "step": 75 + }, + { + "epoch": 0.16068714899240172, + "grad_norm": 0.3135383725166321, + "learning_rate": 9.89278246149752e-07, + "logits/chosen": -0.4140404760837555, + "logits/rejected": -0.38082340359687805, + "logps/chosen": -0.7513999342918396, + "logps/rejected": -0.7880118489265442, + "loss": 1.3921, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5027998685836792, + "rewards/margins": 0.07322371751070023, + "rewards/rejected": -1.5760236978530884, + "step": 76 + }, + { + "epoch": 0.162801453584407, + "grad_norm": 0.8594076633453369, + "learning_rate": 9.885017319367252e-07, + "logits/chosen": -0.35951656103134155, + "logits/rejected": -0.30456626415252686, + "logps/chosen": -0.7989844679832458, + "logps/rejected": -0.8145395517349243, + "loss": 1.4061, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5979689359664917, + "rewards/margins": 0.031110182404518127, + "rewards/rejected": -1.6290791034698486, + "step": 77 + }, + { + "epoch": 0.1649157581764123, + "grad_norm": 0.4393538534641266, + "learning_rate": 9.876983993675989e-07, + "logits/chosen": -0.33119240403175354, + "logits/rejected": -0.31970253586769104, + "logps/chosen": -0.721772313117981, + "logps/rejected": -0.7868390679359436, + "loss": 1.3325, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.443544626235962, + "rewards/margins": 0.13013358414173126, + "rewards/rejected": -1.5736781358718872, + "step": 78 + }, + { + "epoch": 0.1670300627684176, + "grad_norm": 0.8017925024032593, + "learning_rate": 9.868682925446909e-07, + "logits/chosen": -0.3608989417552948, + "logits/rejected": -0.3497124910354614, + "logps/chosen": -0.8117240071296692, + "logps/rejected": -0.8506529331207275, + "loss": 1.3671, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6234480142593384, + "rewards/margins": 0.07785768806934357, + "rewards/rejected": -1.701305866241455, + "step": 79 + }, + { + "epoch": 0.16914436736042285, + "grad_norm": 0.5704994201660156, + "learning_rate": 9.860114570402054e-07, + "logits/chosen": -0.37441548705101013, + "logits/rejected": -0.32188406586647034, + "logps/chosen": -0.745419442653656, + "logps/rejected": -0.816170871257782, + "loss": 1.3315, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.490838885307312, + "rewards/margins": 0.1415030062198639, + "rewards/rejected": -1.632341742515564, + "step": 80 + }, + { + "epoch": 0.17125867195242814, + "grad_norm": 3.0005106925964355, + "learning_rate": 9.85127939893729e-07, + "logits/chosen": -0.34791454672813416, + "logits/rejected": -0.32542383670806885, + "logps/chosen": -0.7547991275787354, + "logps/rejected": -0.7868378162384033, + "loss": 1.3742, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5095982551574707, + "rewards/margins": 0.06407731771469116, + "rewards/rejected": -1.5736756324768066, + "step": 81 + }, + { + "epoch": 0.17337297654443343, + "grad_norm": 0.5891271829605103, + "learning_rate": 9.842177896096493e-07, + "logits/chosen": -0.38649702072143555, + "logits/rejected": -0.36892226338386536, + "logps/chosen": -0.7556143999099731, + "logps/rejected": -0.82858806848526, + "loss": 1.3233, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5112287998199463, + "rewards/margins": 0.14594702422618866, + "rewards/rejected": -1.65717613697052, + "step": 82 + }, + { + "epoch": 0.17548728113643872, + "grad_norm": 0.27861157059669495, + "learning_rate": 9.832810561544923e-07, + "logits/chosen": -0.38264670968055725, + "logits/rejected": -0.35908499360084534, + "logps/chosen": -0.7858557105064392, + "logps/rejected": -0.8571599721908569, + "loss": 1.3234, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5717114210128784, + "rewards/margins": 0.1426086127758026, + "rewards/rejected": -1.7143199443817139, + "step": 83 + }, + { + "epoch": 0.177601585728444, + "grad_norm": 0.8158763647079468, + "learning_rate": 9.823177909541793e-07, + "logits/chosen": -0.4076104760169983, + "logits/rejected": -0.3934200704097748, + "logps/chosen": -0.8089872002601624, + "logps/rejected": -0.8885407447814941, + "loss": 1.3476, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6179744005203247, + "rewards/margins": 0.15910708904266357, + "rewards/rejected": -1.7770814895629883, + "step": 84 + }, + { + "epoch": 0.1797158903204493, + "grad_norm": 0.46008333563804626, + "learning_rate": 9.813280468912022e-07, + "logits/chosen": -0.33124151825904846, + "logits/rejected": -0.34535717964172363, + "logps/chosen": -0.733020544052124, + "logps/rejected": -0.8716557621955872, + "loss": 1.2807, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.466041088104248, + "rewards/margins": 0.27727028727531433, + "rewards/rejected": -1.7433115243911743, + "step": 85 + }, + { + "epoch": 0.18183019491245458, + "grad_norm": 0.3784334659576416, + "learning_rate": 9.80311878301722e-07, + "logits/chosen": -0.40713849663734436, + "logits/rejected": -0.3808574080467224, + "logps/chosen": -0.7063947319984436, + "logps/rejected": -0.7589148879051208, + "loss": 1.3501, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4127894639968872, + "rewards/margins": 0.10504024475812912, + "rewards/rejected": -1.5178297758102417, + "step": 86 + }, + { + "epoch": 0.18394449950445987, + "grad_norm": 0.6137813329696655, + "learning_rate": 9.792693409725853e-07, + "logits/chosen": -0.4119255542755127, + "logits/rejected": -0.44221603870391846, + "logps/chosen": -0.795850932598114, + "logps/rejected": -0.8925026059150696, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.591701865196228, + "rewards/margins": 0.19330324232578278, + "rewards/rejected": -1.7850052118301392, + "step": 87 + }, + { + "epoch": 0.18605880409646514, + "grad_norm": 0.5354250073432922, + "learning_rate": 9.78200492138261e-07, + "logits/chosen": -0.3792279064655304, + "logits/rejected": -0.3789527714252472, + "logps/chosen": -0.7249161005020142, + "logps/rejected": -0.8088154792785645, + "loss": 1.3304, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4498322010040283, + "rewards/margins": 0.16779886186122894, + "rewards/rejected": -1.617630958557129, + "step": 88 + }, + { + "epoch": 0.18817310868847043, + "grad_norm": 0.6911243796348572, + "learning_rate": 9.771053904776995e-07, + "logits/chosen": -0.38837429881095886, + "logits/rejected": -0.36597418785095215, + "logps/chosen": -0.7528612017631531, + "logps/rejected": -0.7981135249137878, + "loss": 1.3481, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.5057224035263062, + "rewards/margins": 0.09050464630126953, + "rewards/rejected": -1.5962270498275757, + "step": 89 + }, + { + "epoch": 0.19028741328047571, + "grad_norm": 0.37110790610313416, + "learning_rate": 9.759840961111097e-07, + "logits/chosen": -0.3804919421672821, + "logits/rejected": -0.38750600814819336, + "logps/chosen": -0.8673248291015625, + "logps/rejected": -0.9381619691848755, + "loss": 1.3303, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.734649658203125, + "rewards/margins": 0.14167429506778717, + "rewards/rejected": -1.876323938369751, + "step": 90 + }, + { + "epoch": 0.192401717872481, + "grad_norm": 0.8033086657524109, + "learning_rate": 9.748366705966593e-07, + "logits/chosen": -0.3804866075515747, + "logits/rejected": -0.31055447459220886, + "logps/chosen": -0.7535511255264282, + "logps/rejected": -0.7824290990829468, + "loss": 1.3706, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5071022510528564, + "rewards/margins": 0.057755980640649796, + "rewards/rejected": -1.5648581981658936, + "step": 91 + }, + { + "epoch": 0.1945160224644863, + "grad_norm": 0.6934167742729187, + "learning_rate": 9.736631769270957e-07, + "logits/chosen": -0.443461149930954, + "logits/rejected": -0.4398806691169739, + "logps/chosen": -0.8123858571052551, + "logps/rejected": -0.8972252607345581, + "loss": 1.3464, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6247717142105103, + "rewards/margins": 0.16967862844467163, + "rewards/rejected": -1.7944505214691162, + "step": 92 + }, + { + "epoch": 0.19663032705649158, + "grad_norm": 1.140067458152771, + "learning_rate": 9.724636795262866e-07, + "logits/chosen": -0.43793433904647827, + "logits/rejected": -0.4402340352535248, + "logps/chosen": -0.8155819177627563, + "logps/rejected": -0.8659977912902832, + "loss": 1.3621, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6311638355255127, + "rewards/margins": 0.10083187371492386, + "rewards/rejected": -1.7319955825805664, + "step": 93 + }, + { + "epoch": 0.19874463164849687, + "grad_norm": 1.8303897380828857, + "learning_rate": 9.712382442456844e-07, + "logits/chosen": -0.34288379549980164, + "logits/rejected": -0.36632782220840454, + "logps/chosen": -0.7338054776191711, + "logps/rejected": -0.8537961840629578, + "loss": 1.2942, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4676109552383423, + "rewards/margins": 0.23998141288757324, + "rewards/rejected": -1.7075923681259155, + "step": 94 + }, + { + "epoch": 0.20085893624050213, + "grad_norm": 0.34392252564430237, + "learning_rate": 9.6998693836071e-07, + "logits/chosen": -0.4381723999977112, + "logits/rejected": -0.4031081199645996, + "logps/chosen": -0.7130292057991028, + "logps/rejected": -0.7402217388153076, + "loss": 1.3766, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4260584115982056, + "rewards/margins": 0.0543849840760231, + "rewards/rejected": -1.4804434776306152, + "step": 95 + }, + { + "epoch": 0.20297324083250742, + "grad_norm": 0.4129842519760132, + "learning_rate": 9.687098305670604e-07, + "logits/chosen": -0.39796924591064453, + "logits/rejected": -0.3476859927177429, + "logps/chosen": -0.7520885467529297, + "logps/rejected": -0.8058558702468872, + "loss": 1.3663, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5041770935058594, + "rewards/margins": 0.10753461718559265, + "rewards/rejected": -1.6117117404937744, + "step": 96 + }, + { + "epoch": 0.2050875454245127, + "grad_norm": 0.5054985284805298, + "learning_rate": 9.674069909769362e-07, + "logits/chosen": -0.3942393660545349, + "logits/rejected": -0.3627544045448303, + "logps/chosen": -0.733702540397644, + "logps/rejected": -0.781308650970459, + "loss": 1.342, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.467405080795288, + "rewards/margins": 0.09521210938692093, + "rewards/rejected": -1.562617301940918, + "step": 97 + }, + { + "epoch": 0.207201850016518, + "grad_norm": 0.6975870728492737, + "learning_rate": 9.66078491115194e-07, + "logits/chosen": -0.38557127118110657, + "logits/rejected": -0.3581204414367676, + "logps/chosen": -0.7359838485717773, + "logps/rejected": -0.7648134827613831, + "loss": 1.3841, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.4719676971435547, + "rewards/margins": 0.057659298181533813, + "rewards/rejected": -1.5296269655227661, + "step": 98 + }, + { + "epoch": 0.2093161546085233, + "grad_norm": 0.7575029730796814, + "learning_rate": 9.647244039154177e-07, + "logits/chosen": -0.3871467411518097, + "logits/rejected": -0.3941374123096466, + "logps/chosen": -0.6516871452331543, + "logps/rejected": -0.7066073417663574, + "loss": 1.3364, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3033742904663086, + "rewards/margins": 0.10984040796756744, + "rewards/rejected": -1.4132146835327148, + "step": 99 + }, + { + "epoch": 0.21143045920052858, + "grad_norm": 1.3344250917434692, + "learning_rate": 9.633448037159166e-07, + "logits/chosen": -0.40887755155563354, + "logits/rejected": -0.41733911633491516, + "logps/chosen": -0.6978950500488281, + "logps/rejected": -0.793424129486084, + "loss": 1.3076, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3957901000976562, + "rewards/margins": 0.1910584717988968, + "rewards/rejected": -1.586848258972168, + "step": 100 + }, + { + "epoch": 0.21354476379253387, + "grad_norm": 0.8798456788063049, + "learning_rate": 9.619397662556433e-07, + "logits/chosen": -0.302534282207489, + "logits/rejected": -0.29954588413238525, + "logps/chosen": -0.719552755355835, + "logps/rejected": -0.7628123164176941, + "loss": 1.3699, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.43910551071167, + "rewards/margins": 0.08651915192604065, + "rewards/rejected": -1.5256246328353882, + "step": 101 + }, + { + "epoch": 0.21565906838453916, + "grad_norm": 0.8746365308761597, + "learning_rate": 9.605093686700353e-07, + "logits/chosen": -0.372263640165329, + "logits/rejected": -0.3714321255683899, + "logps/chosen": -0.6665956974029541, + "logps/rejected": -0.7361368536949158, + "loss": 1.3173, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3331913948059082, + "rewards/margins": 0.1390824019908905, + "rewards/rejected": -1.4722737073898315, + "step": 102 + }, + { + "epoch": 0.21777337297654442, + "grad_norm": 0.8450930714607239, + "learning_rate": 9.590536894867812e-07, + "logits/chosen": -0.37228280305862427, + "logits/rejected": -0.37763556838035583, + "logps/chosen": -0.7425979375839233, + "logps/rejected": -0.7557005882263184, + "loss": 1.4085, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4851958751678467, + "rewards/margins": 0.026205357164144516, + "rewards/rejected": -1.5114011764526367, + "step": 103 + }, + { + "epoch": 0.2198876775685497, + "grad_norm": 0.5075035691261292, + "learning_rate": 9.575728086215091e-07, + "logits/chosen": -0.4433964788913727, + "logits/rejected": -0.3782787024974823, + "logps/chosen": -0.7308244109153748, + "logps/rejected": -0.8043883442878723, + "loss": 1.3353, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4616488218307495, + "rewards/margins": 0.14712783694267273, + "rewards/rejected": -1.6087766885757446, + "step": 104 + }, + { + "epoch": 0.222001982160555, + "grad_norm": 1.0270946025848389, + "learning_rate": 9.560668073733993e-07, + "logits/chosen": -0.3593980073928833, + "logits/rejected": -0.3159312903881073, + "logps/chosen": -0.757469892501831, + "logps/rejected": -0.8256179094314575, + "loss": 1.3289, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.514939785003662, + "rewards/margins": 0.13629598915576935, + "rewards/rejected": -1.651235818862915, + "step": 105 + }, + { + "epoch": 0.2241162867525603, + "grad_norm": 0.8390078544616699, + "learning_rate": 9.54535768420721e-07, + "logits/chosen": -0.3266332149505615, + "logits/rejected": -0.3008713722229004, + "logps/chosen": -0.7286102771759033, + "logps/rejected": -0.7803273797035217, + "loss": 1.3593, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4572205543518066, + "rewards/margins": 0.10343428701162338, + "rewards/rejected": -1.5606547594070435, + "step": 106 + }, + { + "epoch": 0.22623059134456558, + "grad_norm": 1.130595088005066, + "learning_rate": 9.529797758162934e-07, + "logits/chosen": -0.36109817028045654, + "logits/rejected": -0.34797021746635437, + "logps/chosen": -0.7723361253738403, + "logps/rejected": -0.8873662352561951, + "loss": 1.2956, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5446722507476807, + "rewards/margins": 0.23006024956703186, + "rewards/rejected": -1.7747324705123901, + "step": 107 + }, + { + "epoch": 0.22834489593657087, + "grad_norm": 0.9399718642234802, + "learning_rate": 9.513989149828717e-07, + "logits/chosen": -0.3596777021884918, + "logits/rejected": -0.3660539388656616, + "logps/chosen": -0.7130635976791382, + "logps/rejected": -0.7378955483436584, + "loss": 1.3774, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4261271953582764, + "rewards/margins": 0.04966379329562187, + "rewards/rejected": -1.475791096687317, + "step": 108 + }, + { + "epoch": 0.23045920052857616, + "grad_norm": 1.097594976425171, + "learning_rate": 9.49793272708457e-07, + "logits/chosen": -0.31783169507980347, + "logits/rejected": -0.3008044362068176, + "logps/chosen": -0.6933202147483826, + "logps/rejected": -0.7510000467300415, + "loss": 1.3498, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3866404294967651, + "rewards/margins": 0.1153596043586731, + "rewards/rejected": -1.502000093460083, + "step": 109 + }, + { + "epoch": 0.23257350512058145, + "grad_norm": 1.096330165863037, + "learning_rate": 9.481629371415313e-07, + "logits/chosen": -0.3582899570465088, + "logits/rejected": -0.3120020031929016, + "logps/chosen": -0.817268431186676, + "logps/rejected": -0.8862374424934387, + "loss": 1.3349, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.634536862373352, + "rewards/margins": 0.13793781399726868, + "rewards/rejected": -1.7724748849868774, + "step": 110 + }, + { + "epoch": 0.2346878097125867, + "grad_norm": 0.8261978626251221, + "learning_rate": 9.465079977862192e-07, + "logits/chosen": -0.41336673498153687, + "logits/rejected": -0.39544352889060974, + "logps/chosen": -0.7673372030258179, + "logps/rejected": -0.8331737518310547, + "loss": 1.3373, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5346744060516357, + "rewards/margins": 0.1316729635000229, + "rewards/rejected": -1.6663475036621094, + "step": 111 + }, + { + "epoch": 0.236802114304592, + "grad_norm": 0.5922806262969971, + "learning_rate": 9.448285454973737e-07, + "logits/chosen": -0.3224758207798004, + "logits/rejected": -0.3118049204349518, + "logps/chosen": -0.7584627866744995, + "logps/rejected": -0.8859898447990417, + "loss": 1.2731, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.516925573348999, + "rewards/margins": 0.2550540566444397, + "rewards/rejected": -1.7719796895980835, + "step": 112 + }, + { + "epoch": 0.2389164188965973, + "grad_norm": 1.3172541856765747, + "learning_rate": 9.431246724755877e-07, + "logits/chosen": -0.4287208318710327, + "logits/rejected": -0.3984590172767639, + "logps/chosen": -0.7587048411369324, + "logps/rejected": -0.7860502004623413, + "loss": 1.3832, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5174096822738647, + "rewards/margins": 0.05469079315662384, + "rewards/rejected": -1.5721004009246826, + "step": 113 + }, + { + "epoch": 0.24103072348860258, + "grad_norm": 0.7749882340431213, + "learning_rate": 9.413964722621337e-07, + "logits/chosen": -0.39085906744003296, + "logits/rejected": -0.3316206932067871, + "logps/chosen": -0.7035898566246033, + "logps/rejected": -0.7375759482383728, + "loss": 1.387, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4071797132492065, + "rewards/margins": 0.06797221302986145, + "rewards/rejected": -1.4751518964767456, + "step": 114 + }, + { + "epoch": 0.24314502808060787, + "grad_norm": 1.0914056301116943, + "learning_rate": 9.396440397338272e-07, + "logits/chosen": -0.38826486468315125, + "logits/rejected": -0.35520774126052856, + "logps/chosen": -0.7385872602462769, + "logps/rejected": -0.7974889278411865, + "loss": 1.3477, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4771745204925537, + "rewards/margins": 0.11780343949794769, + "rewards/rejected": -1.594977855682373, + "step": 115 + }, + { + "epoch": 0.24525933267261316, + "grad_norm": 1.2966018915176392, + "learning_rate": 9.378674710978183e-07, + "logits/chosen": -0.36493802070617676, + "logits/rejected": -0.34763696789741516, + "logps/chosen": -0.6731826663017273, + "logps/rejected": -0.7645149827003479, + "loss": 1.3, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3463653326034546, + "rewards/margins": 0.18266455829143524, + "rewards/rejected": -1.5290299654006958, + "step": 116 + }, + { + "epoch": 0.24737363726461845, + "grad_norm": 0.49401605129241943, + "learning_rate": 9.360668638863109e-07, + "logits/chosen": -0.40416795015335083, + "logits/rejected": -0.3815993070602417, + "logps/chosen": -0.719497799873352, + "logps/rejected": -0.7588324546813965, + "loss": 1.3621, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.438995599746704, + "rewards/margins": 0.07866920530796051, + "rewards/rejected": -1.517664909362793, + "step": 117 + }, + { + "epoch": 0.2494879418566237, + "grad_norm": 1.0603238344192505, + "learning_rate": 9.342423169512071e-07, + "logits/chosen": -0.3857055604457855, + "logits/rejected": -0.3524513244628906, + "logps/chosen": -0.7373769283294678, + "logps/rejected": -0.7971038818359375, + "loss": 1.3358, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4747538566589355, + "rewards/margins": 0.11945393681526184, + "rewards/rejected": -1.594207763671875, + "step": 118 + }, + { + "epoch": 0.251602246448629, + "grad_norm": 0.9880490303039551, + "learning_rate": 9.323939304586804e-07, + "logits/chosen": -0.31455785036087036, + "logits/rejected": -0.3102484941482544, + "logps/chosen": -0.7276102900505066, + "logps/rejected": -0.7446941137313843, + "loss": 1.3928, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.4552205801010132, + "rewards/margins": 0.034167706966400146, + "rewards/rejected": -1.4893882274627686, + "step": 119 + }, + { + "epoch": 0.2537165510406343, + "grad_norm": 1.0419566631317139, + "learning_rate": 9.305218058836776e-07, + "logits/chosen": -0.38093918561935425, + "logits/rejected": -0.3588898181915283, + "logps/chosen": -0.715582013130188, + "logps/rejected": -0.8271002769470215, + "loss": 1.2934, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.431164026260376, + "rewards/margins": 0.22303667664527893, + "rewards/rejected": -1.654200553894043, + "step": 120 + }, + { + "epoch": 0.2558308556326396, + "grad_norm": 0.657620370388031, + "learning_rate": 9.286260460043473e-07, + "logits/chosen": -0.45690783858299255, + "logits/rejected": -0.4082674980163574, + "logps/chosen": -0.6932571530342102, + "logps/rejected": -0.7631082534790039, + "loss": 1.3398, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3865143060684204, + "rewards/margins": 0.13970226049423218, + "rewards/rejected": -1.5262165069580078, + "step": 121 + }, + { + "epoch": 0.25794516022464486, + "grad_norm": 0.983686089515686, + "learning_rate": 9.267067548963974e-07, + "logits/chosen": -0.40266987681388855, + "logits/rejected": -0.37586671113967896, + "logps/chosen": -0.7362720966339111, + "logps/rejected": -0.7538987398147583, + "loss": 1.4066, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4725441932678223, + "rewards/margins": 0.03525342047214508, + "rewards/rejected": -1.5077974796295166, + "step": 122 + }, + { + "epoch": 0.26005946481665015, + "grad_norm": 1.0076361894607544, + "learning_rate": 9.24764037927381e-07, + "logits/chosen": -0.4461461007595062, + "logits/rejected": -0.40700826048851013, + "logps/chosen": -0.7206646800041199, + "logps/rejected": -0.7489192485809326, + "loss": 1.3759, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4413293600082397, + "rewards/margins": 0.05650928616523743, + "rewards/rejected": -1.4978384971618652, + "step": 123 + }, + { + "epoch": 0.26217376940865544, + "grad_norm": 0.933315098285675, + "learning_rate": 9.22798001750913e-07, + "logits/chosen": -0.3966676890850067, + "logits/rejected": -0.3572196960449219, + "logps/chosen": -0.7075096368789673, + "logps/rejected": -0.7406759262084961, + "loss": 1.3667, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4150192737579346, + "rewards/margins": 0.06633266806602478, + "rewards/rejected": -1.4813518524169922, + "step": 124 + }, + { + "epoch": 0.26428807400066073, + "grad_norm": 0.6277392506599426, + "learning_rate": 9.20808754300814e-07, + "logits/chosen": -0.3555490970611572, + "logits/rejected": -0.35786163806915283, + "logps/chosen": -0.7549921274185181, + "logps/rejected": -0.832869291305542, + "loss": 1.3175, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.5099842548370361, + "rewards/margins": 0.15575438737869263, + "rewards/rejected": -1.665738582611084, + "step": 125 + }, + { + "epoch": 0.266402378592666, + "grad_norm": 0.7172744274139404, + "learning_rate": 9.18796404785185e-07, + "logits/chosen": -0.41230690479278564, + "logits/rejected": -0.39935630559921265, + "logps/chosen": -0.7129833698272705, + "logps/rejected": -0.7888559103012085, + "loss": 1.3167, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.425966739654541, + "rewards/margins": 0.15174514055252075, + "rewards/rejected": -1.577711820602417, + "step": 126 + }, + { + "epoch": 0.2685166831846713, + "grad_norm": 1.2629508972167969, + "learning_rate": 9.16761063680412e-07, + "logits/chosen": -0.36754000186920166, + "logits/rejected": -0.3541562259197235, + "logps/chosen": -0.6992133855819702, + "logps/rejected": -0.7668892741203308, + "loss": 1.3735, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3984267711639404, + "rewards/margins": 0.13535188138484955, + "rewards/rejected": -1.5337785482406616, + "step": 127 + }, + { + "epoch": 0.2706309877766766, + "grad_norm": 0.7024405598640442, + "learning_rate": 9.147028427251009e-07, + "logits/chosen": -0.4014585018157959, + "logits/rejected": -0.40560898184776306, + "logps/chosen": -0.727234959602356, + "logps/rejected": -0.8070081472396851, + "loss": 1.3138, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.454469919204712, + "rewards/margins": 0.15954652428627014, + "rewards/rejected": -1.6140162944793701, + "step": 128 + }, + { + "epoch": 0.2727452923686819, + "grad_norm": 1.6173532009124756, + "learning_rate": 9.126218549139433e-07, + "logits/chosen": -0.32572367787361145, + "logits/rejected": -0.3470613956451416, + "logps/chosen": -0.7555541396141052, + "logps/rejected": -0.8856738209724426, + "loss": 1.2461, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.5111082792282104, + "rewards/margins": 0.26023951172828674, + "rewards/rejected": -1.7713476419448853, + "step": 129 + }, + { + "epoch": 0.2748595969606871, + "grad_norm": 0.5878487229347229, + "learning_rate": 9.105182144915129e-07, + "logits/chosen": -0.39267170429229736, + "logits/rejected": -0.3448992967605591, + "logps/chosen": -0.6776289343833923, + "logps/rejected": -0.7530183792114258, + "loss": 1.3242, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3552578687667847, + "rewards/margins": 0.15077897906303406, + "rewards/rejected": -1.5060367584228516, + "step": 130 + }, + { + "epoch": 0.2769739015526924, + "grad_norm": 0.43264809250831604, + "learning_rate": 9.08392036945994e-07, + "logits/chosen": -0.39980950951576233, + "logits/rejected": -0.4247930645942688, + "logps/chosen": -0.7898982167243958, + "logps/rejected": -0.8856299519538879, + "loss": 1.3004, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5797964334487915, + "rewards/margins": 0.19146347045898438, + "rewards/rejected": -1.7712599039077759, + "step": 131 + }, + { + "epoch": 0.2790882061446977, + "grad_norm": 1.0348538160324097, + "learning_rate": 9.062434390028407e-07, + "logits/chosen": -0.35729700326919556, + "logits/rejected": -0.3265542984008789, + "logps/chosen": -0.7120587229728699, + "logps/rejected": -0.771691083908081, + "loss": 1.3374, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4241174459457397, + "rewards/margins": 0.11926469206809998, + "rewards/rejected": -1.543382167816162, + "step": 132 + }, + { + "epoch": 0.281202510736703, + "grad_norm": 2.0902225971221924, + "learning_rate": 9.04072538618369e-07, + "logits/chosen": -0.4942469298839569, + "logits/rejected": -0.48699846863746643, + "logps/chosen": -0.7882512211799622, + "logps/rejected": -0.8270165920257568, + "loss": 1.3715, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5765024423599243, + "rewards/margins": 0.07753071188926697, + "rewards/rejected": -1.6540331840515137, + "step": 133 + }, + { + "epoch": 0.2833168153287083, + "grad_norm": 1.6436113119125366, + "learning_rate": 9.018794549732817e-07, + "logits/chosen": -0.41133156418800354, + "logits/rejected": -0.4146718382835388, + "logps/chosen": -0.779824435710907, + "logps/rejected": -0.9421006441116333, + "loss": 1.2521, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.559648871421814, + "rewards/margins": 0.324552446603775, + "rewards/rejected": -1.8842012882232666, + "step": 134 + }, + { + "epoch": 0.28543111992071357, + "grad_norm": 0.8831859827041626, + "learning_rate": 8.996643084661244e-07, + "logits/chosen": -0.42452165484428406, + "logits/rejected": -0.3798604905605316, + "logps/chosen": -0.6499216556549072, + "logps/rejected": -0.7796702980995178, + "loss": 1.2581, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.2998433113098145, + "rewards/margins": 0.25949734449386597, + "rewards/rejected": -1.5593405961990356, + "step": 135 + }, + { + "epoch": 0.28754542451271886, + "grad_norm": 0.8031218647956848, + "learning_rate": 8.974272207066767e-07, + "logits/chosen": -0.38131940364837646, + "logits/rejected": -0.3854255676269531, + "logps/chosen": -0.7026851773262024, + "logps/rejected": -0.762391209602356, + "loss": 1.3333, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4053703546524048, + "rewards/margins": 0.11941206455230713, + "rewards/rejected": -1.524782419204712, + "step": 136 + }, + { + "epoch": 0.28965972910472415, + "grad_norm": 1.4455821514129639, + "learning_rate": 8.951683145092748e-07, + "logits/chosen": -0.42824965715408325, + "logits/rejected": -0.4320424795150757, + "logps/chosen": -0.7893270254135132, + "logps/rejected": -0.8517144322395325, + "loss": 1.3652, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5786540508270264, + "rewards/margins": 0.12477481365203857, + "rewards/rejected": -1.703428864479065, + "step": 137 + }, + { + "epoch": 0.29177403369672944, + "grad_norm": 0.6299450397491455, + "learning_rate": 8.928877138860706e-07, + "logits/chosen": -0.4388589560985565, + "logits/rejected": -0.40156903862953186, + "logps/chosen": -0.7346572875976562, + "logps/rejected": -0.8166492581367493, + "loss": 1.3134, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4693145751953125, + "rewards/margins": 0.16398391127586365, + "rewards/rejected": -1.6332985162734985, + "step": 138 + }, + { + "epoch": 0.29388833828873473, + "grad_norm": 2.784437417984009, + "learning_rate": 8.905855440402224e-07, + "logits/chosen": -0.405662477016449, + "logits/rejected": -0.35549795627593994, + "logps/chosen": -0.7482771277427673, + "logps/rejected": -0.795568585395813, + "loss": 1.3656, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.4965542554855347, + "rewards/margins": 0.09458285570144653, + "rewards/rejected": -1.591137170791626, + "step": 139 + }, + { + "epoch": 0.29600264288074, + "grad_norm": 0.4958692193031311, + "learning_rate": 8.882619313590212e-07, + "logits/chosen": -0.3814452886581421, + "logits/rejected": -0.35715553164482117, + "logps/chosen": -0.7731542587280273, + "logps/rejected": -0.8285202980041504, + "loss": 1.3776, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5463085174560547, + "rewards/margins": 0.11073210835456848, + "rewards/rejected": -1.6570405960083008, + "step": 140 + }, + { + "epoch": 0.2981169474727453, + "grad_norm": 0.4597362279891968, + "learning_rate": 8.859170034069532e-07, + "logits/chosen": -0.388383150100708, + "logits/rejected": -0.4071737229824066, + "logps/chosen": -0.7263504266738892, + "logps/rejected": -0.769676148891449, + "loss": 1.3712, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4527008533477783, + "rewards/margins": 0.08665145933628082, + "rewards/rejected": -1.539352297782898, + "step": 141 + }, + { + "epoch": 0.3002312520647506, + "grad_norm": 0.4914930760860443, + "learning_rate": 8.835508889186956e-07, + "logits/chosen": -0.41084378957748413, + "logits/rejected": -0.3823031187057495, + "logps/chosen": -0.7565821409225464, + "logps/rejected": -0.9084322452545166, + "loss": 1.2717, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5131642818450928, + "rewards/margins": 0.3037002384662628, + "rewards/rejected": -1.8168644905090332, + "step": 142 + }, + { + "epoch": 0.3023455566567559, + "grad_norm": 2.0075581073760986, + "learning_rate": 8.811637177920499e-07, + "logits/chosen": -0.4438302516937256, + "logits/rejected": -0.4916025698184967, + "logps/chosen": -0.800719141960144, + "logps/rejected": -0.8658267855644226, + "loss": 1.358, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.601438283920288, + "rewards/margins": 0.1302153617143631, + "rewards/rejected": -1.7316535711288452, + "step": 143 + }, + { + "epoch": 0.3044598612487612, + "grad_norm": 1.1243022680282593, + "learning_rate": 8.7875562108081e-07, + "logits/chosen": -0.40519949793815613, + "logits/rejected": -0.3905750811100006, + "logps/chosen": -0.689585268497467, + "logps/rejected": -0.7312421798706055, + "loss": 1.3503, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.379170536994934, + "rewards/margins": 0.08331384509801865, + "rewards/rejected": -1.462484359741211, + "step": 144 + }, + { + "epoch": 0.3065741658407664, + "grad_norm": 0.7543137669563293, + "learning_rate": 8.76326730987568e-07, + "logits/chosen": -0.4696752727031708, + "logits/rejected": -0.4357326626777649, + "logps/chosen": -0.7813425660133362, + "logps/rejected": -0.8276973962783813, + "loss": 1.3794, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5626851320266724, + "rewards/margins": 0.09270970523357391, + "rewards/rejected": -1.6553947925567627, + "step": 145 + }, + { + "epoch": 0.3086884704327717, + "grad_norm": 1.3136053085327148, + "learning_rate": 8.738771808564555e-07, + "logits/chosen": -0.4262731075286865, + "logits/rejected": -0.44038820266723633, + "logps/chosen": -0.697494387626648, + "logps/rejected": -0.8369535803794861, + "loss": 1.2699, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.394988775253296, + "rewards/margins": 0.2789183557033539, + "rewards/rejected": -1.6739071607589722, + "step": 146 + }, + { + "epoch": 0.310802775024777, + "grad_norm": 2.221562385559082, + "learning_rate": 8.714071051658245e-07, + "logits/chosen": -0.40089336037635803, + "logits/rejected": -0.37991875410079956, + "logps/chosen": -0.7704445123672485, + "logps/rejected": -0.859091579914093, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.540889024734497, + "rewards/margins": 0.17729414999485016, + "rewards/rejected": -1.718183159828186, + "step": 147 + }, + { + "epoch": 0.3129170796167823, + "grad_norm": 1.5049912929534912, + "learning_rate": 8.689166395208636e-07, + "logits/chosen": -0.38984015583992004, + "logits/rejected": -0.35900723934173584, + "logps/chosen": -0.6424779891967773, + "logps/rejected": -0.7145389318466187, + "loss": 1.3261, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.2849559783935547, + "rewards/margins": 0.14412200450897217, + "rewards/rejected": -1.4290778636932373, + "step": 148 + }, + { + "epoch": 0.31503138420878757, + "grad_norm": 0.36125388741493225, + "learning_rate": 8.664059206461534e-07, + "logits/chosen": -0.3490441143512726, + "logits/rejected": -0.3219914436340332, + "logps/chosen": -0.7200264930725098, + "logps/rejected": -0.7924249768257141, + "loss": 1.3476, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4400529861450195, + "rewards/margins": 0.1447969526052475, + "rewards/rejected": -1.5848499536514282, + "step": 149 + }, + { + "epoch": 0.31714568880079286, + "grad_norm": 1.039840579032898, + "learning_rate": 8.638750863781612e-07, + "logits/chosen": -0.40701645612716675, + "logits/rejected": -0.406186580657959, + "logps/chosen": -0.7083575129508972, + "logps/rejected": -0.7766748070716858, + "loss": 1.3263, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4167150259017944, + "rewards/margins": 0.1366347074508667, + "rewards/rejected": -1.5533496141433716, + "step": 150 + }, + { + "epoch": 0.31925999339279815, + "grad_norm": 0.7128564119338989, + "learning_rate": 8.613242756576728e-07, + "logits/chosen": -0.40932926535606384, + "logits/rejected": -0.4234562814235687, + "logps/chosen": -0.6775843501091003, + "logps/rejected": -0.7866222858428955, + "loss": 1.2834, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.3551687002182007, + "rewards/margins": 0.2180757373571396, + "rewards/rejected": -1.573244571685791, + "step": 151 + }, + { + "epoch": 0.32137429798480344, + "grad_norm": 1.1701059341430664, + "learning_rate": 8.587536285221655e-07, + "logits/chosen": -0.3654797077178955, + "logits/rejected": -0.3181680738925934, + "logps/chosen": -0.6686022877693176, + "logps/rejected": -0.7058504223823547, + "loss": 1.3612, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.3372045755386353, + "rewards/margins": 0.07449636608362198, + "rewards/rejected": -1.4117008447647095, + "step": 152 + }, + { + "epoch": 0.3234886025768087, + "grad_norm": 0.8239700794219971, + "learning_rate": 8.561632860981204e-07, + "logits/chosen": -0.42527130246162415, + "logits/rejected": -0.4091627299785614, + "logps/chosen": -0.6969794631004333, + "logps/rejected": -0.8019355535507202, + "loss": 1.2974, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3939589262008667, + "rewards/margins": 0.20991206169128418, + "rewards/rejected": -1.6038711071014404, + "step": 153 + }, + { + "epoch": 0.325602907168814, + "grad_norm": 1.4885636568069458, + "learning_rate": 8.535533905932737e-07, + "logits/chosen": -0.4126192331314087, + "logits/rejected": -0.41548141837120056, + "logps/chosen": -0.7076549530029297, + "logps/rejected": -0.7940821051597595, + "loss": 1.3198, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4153099060058594, + "rewards/margins": 0.17285437881946564, + "rewards/rejected": -1.588164210319519, + "step": 154 + }, + { + "epoch": 0.3277172117608193, + "grad_norm": 1.439434289932251, + "learning_rate": 8.509240852888106e-07, + "logits/chosen": -0.3763914704322815, + "logits/rejected": -0.3617165684700012, + "logps/chosen": -0.7189474105834961, + "logps/rejected": -0.827629804611206, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4378948211669922, + "rewards/margins": 0.2173648476600647, + "rewards/rejected": -1.655259609222412, + "step": 155 + }, + { + "epoch": 0.3298315163528246, + "grad_norm": 1.4505418539047241, + "learning_rate": 8.482755145314985e-07, + "logits/chosen": -0.37879478931427, + "logits/rejected": -0.38689684867858887, + "logps/chosen": -0.7011865973472595, + "logps/rejected": -0.8019431829452515, + "loss": 1.3158, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.402373194694519, + "rewards/margins": 0.2015131413936615, + "rewards/rejected": -1.603886365890503, + "step": 156 + }, + { + "epoch": 0.3319458209448299, + "grad_norm": 2.0968713760375977, + "learning_rate": 8.45607823725763e-07, + "logits/chosen": -0.4366365075111389, + "logits/rejected": -0.41210681200027466, + "logps/chosen": -0.6455651521682739, + "logps/rejected": -0.7228428721427917, + "loss": 1.3247, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.2911303043365479, + "rewards/margins": 0.1545555144548416, + "rewards/rejected": -1.4456857442855835, + "step": 157 + }, + { + "epoch": 0.3340601255368352, + "grad_norm": 0.6716106534004211, + "learning_rate": 8.429211593257052e-07, + "logits/chosen": -0.42992207407951355, + "logits/rejected": -0.4105672836303711, + "logps/chosen": -0.6981461048126221, + "logps/rejected": -0.7909567952156067, + "loss": 1.3128, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3962922096252441, + "rewards/margins": 0.1856214702129364, + "rewards/rejected": -1.5819135904312134, + "step": 158 + }, + { + "epoch": 0.33617443012884046, + "grad_norm": 2.4430501461029053, + "learning_rate": 8.402156688270612e-07, + "logits/chosen": -0.4184916317462921, + "logits/rejected": -0.3943992257118225, + "logps/chosen": -0.6568948030471802, + "logps/rejected": -0.7506390810012817, + "loss": 1.2992, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3137896060943604, + "rewards/margins": 0.18748846650123596, + "rewards/rejected": -1.5012781620025635, + "step": 159 + }, + { + "epoch": 0.3382887347208457, + "grad_norm": 2.0322091579437256, + "learning_rate": 8.374915007591052e-07, + "logits/chosen": -0.4713057577610016, + "logits/rejected": -0.42163771390914917, + "logps/chosen": -0.7347853779792786, + "logps/rejected": -0.7770044207572937, + "loss": 1.3801, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4695707559585571, + "rewards/margins": 0.0844380110502243, + "rewards/rejected": -1.5540088415145874, + "step": 160 + }, + { + "epoch": 0.340403039312851, + "grad_norm": 0.4045500159263611, + "learning_rate": 8.347488046764948e-07, + "logits/chosen": -0.39465126395225525, + "logits/rejected": -0.3961923122406006, + "logps/chosen": -0.601732075214386, + "logps/rejected": -0.694148600101471, + "loss": 1.2859, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.203464150428772, + "rewards/margins": 0.18483319878578186, + "rewards/rejected": -1.388297200202942, + "step": 161 + }, + { + "epoch": 0.3425173439048563, + "grad_norm": 2.79396915435791, + "learning_rate": 8.319877311510612e-07, + "logits/chosen": -0.4311378002166748, + "logits/rejected": -0.4248836636543274, + "logps/chosen": -0.6813413500785828, + "logps/rejected": -0.775830864906311, + "loss": 1.3001, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3626827001571655, + "rewards/margins": 0.18897925317287445, + "rewards/rejected": -1.551661729812622, + "step": 162 + }, + { + "epoch": 0.34463164849686156, + "grad_norm": 0.714146077632904, + "learning_rate": 8.292084317635419e-07, + "logits/chosen": -0.4060715436935425, + "logits/rejected": -0.3770482540130615, + "logps/chosen": -0.7176523208618164, + "logps/rejected": -0.7973593473434448, + "loss": 1.324, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4353046417236328, + "rewards/margins": 0.15941408276557922, + "rewards/rejected": -1.5947186946868896, + "step": 163 + }, + { + "epoch": 0.34674595308886685, + "grad_norm": 1.6007037162780762, + "learning_rate": 8.264110590952607e-07, + "logits/chosen": -0.49063974618911743, + "logits/rejected": -0.5119628310203552, + "logps/chosen": -0.7263911366462708, + "logps/rejected": -0.9138184785842896, + "loss": 1.2439, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.4527822732925415, + "rewards/margins": 0.3748546540737152, + "rewards/rejected": -1.827636957168579, + "step": 164 + }, + { + "epoch": 0.34886025768087214, + "grad_norm": 1.4566830396652222, + "learning_rate": 8.235957667197494e-07, + "logits/chosen": -0.4681779146194458, + "logits/rejected": -0.46475380659103394, + "logps/chosen": -0.6923782229423523, + "logps/rejected": -0.7901281118392944, + "loss": 1.295, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.3847564458847046, + "rewards/margins": 0.19549959897994995, + "rewards/rejected": -1.5802562236785889, + "step": 165 + }, + { + "epoch": 0.35097456227287743, + "grad_norm": 3.0825328826904297, + "learning_rate": 8.207627091943177e-07, + "logits/chosen": -0.4294862151145935, + "logits/rejected": -0.42411237955093384, + "logps/chosen": -0.6851246356964111, + "logps/rejected": -0.7844961881637573, + "loss": 1.2871, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3702492713928223, + "rewards/margins": 0.19874317944049835, + "rewards/rejected": -1.5689923763275146, + "step": 166 + }, + { + "epoch": 0.3530888668648827, + "grad_norm": 1.0783339738845825, + "learning_rate": 8.179120420515675e-07, + "logits/chosen": -0.4528030455112457, + "logits/rejected": -0.4626815617084503, + "logps/chosen": -0.703376293182373, + "logps/rejected": -0.8752757906913757, + "loss": 1.2193, + "rewards/accuracies": 0.703125, + "rewards/chosen": -1.406752586364746, + "rewards/margins": 0.34379899501800537, + "rewards/rejected": -1.7505515813827515, + "step": 167 + }, + { + "epoch": 0.355203171456888, + "grad_norm": 2.6788036823272705, + "learning_rate": 8.150439217908556e-07, + "logits/chosen": -0.44946759939193726, + "logits/rejected": -0.47430264949798584, + "logps/chosen": -0.751136839389801, + "logps/rejected": -0.874577522277832, + "loss": 1.29, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.502273678779602, + "rewards/margins": 0.24688144028186798, + "rewards/rejected": -1.749155044555664, + "step": 168 + }, + { + "epoch": 0.3573174760488933, + "grad_norm": 0.9087730050086975, + "learning_rate": 8.121585058696999e-07, + "logits/chosen": -0.47294262051582336, + "logits/rejected": -0.46765226125717163, + "logps/chosen": -0.7291173934936523, + "logps/rejected": -0.7999277114868164, + "loss": 1.3482, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4582347869873047, + "rewards/margins": 0.1416206806898117, + "rewards/rejected": -1.5998554229736328, + "step": 169 + }, + { + "epoch": 0.3594317806408986, + "grad_norm": 3.392674207687378, + "learning_rate": 8.092559526951374e-07, + "logits/chosen": -0.5026620626449585, + "logits/rejected": -0.46620574593544006, + "logps/chosen": -0.746992290019989, + "logps/rejected": -0.8266301155090332, + "loss": 1.3202, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.493984580039978, + "rewards/margins": 0.15927578508853912, + "rewards/rejected": -1.6532602310180664, + "step": 170 + }, + { + "epoch": 0.3615460852329039, + "grad_norm": 1.27628755569458, + "learning_rate": 8.063364216150256e-07, + "logits/chosen": -0.5211395025253296, + "logits/rejected": -0.5419963598251343, + "logps/chosen": -0.7919114828109741, + "logps/rejected": -0.8731362223625183, + "loss": 1.3228, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5838229656219482, + "rewards/margins": 0.16244953870773315, + "rewards/rejected": -1.7462724447250366, + "step": 171 + }, + { + "epoch": 0.36366038982490917, + "grad_norm": 0.8269656896591187, + "learning_rate": 8.034000729092967e-07, + "logits/chosen": -0.49545183777809143, + "logits/rejected": -0.4716613292694092, + "logps/chosen": -0.719520092010498, + "logps/rejected": -0.7876347303390503, + "loss": 1.3367, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.439040184020996, + "rewards/margins": 0.13622930645942688, + "rewards/rejected": -1.5752694606781006, + "step": 172 + }, + { + "epoch": 0.36577469441691446, + "grad_norm": 0.6049383282661438, + "learning_rate": 8.004470677811559e-07, + "logits/chosen": -0.45276379585266113, + "logits/rejected": -0.42617955803871155, + "logps/chosen": -0.7097947597503662, + "logps/rejected": -0.7606989145278931, + "loss": 1.3909, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4195895195007324, + "rewards/margins": 0.10180822014808655, + "rewards/rejected": -1.5213978290557861, + "step": 173 + }, + { + "epoch": 0.36788899900891975, + "grad_norm": 3.980013847351074, + "learning_rate": 7.974775683482337e-07, + "logits/chosen": -0.4783569574356079, + "logits/rejected": -0.43521156907081604, + "logps/chosen": -0.7623491287231445, + "logps/rejected": -0.8719285130500793, + "loss": 1.2838, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.524698257446289, + "rewards/margins": 0.2191585898399353, + "rewards/rejected": -1.7438570261001587, + "step": 174 + }, + { + "epoch": 0.370003303600925, + "grad_norm": 1.024530053138733, + "learning_rate": 7.94491737633684e-07, + "logits/chosen": -0.5009916424751282, + "logits/rejected": -0.48874592781066895, + "logps/chosen": -0.7552992701530457, + "logps/rejected": -0.8485872745513916, + "loss": 1.3153, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5105985403060913, + "rewards/margins": 0.18657605350017548, + "rewards/rejected": -1.6971745491027832, + "step": 175 + }, + { + "epoch": 0.37211760819293027, + "grad_norm": 1.5952919721603394, + "learning_rate": 7.91489739557236e-07, + "logits/chosen": -0.4424138069152832, + "logits/rejected": -0.4334307312965393, + "logps/chosen": -0.6956002116203308, + "logps/rejected": -0.8018803000450134, + "loss": 1.3011, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.3912004232406616, + "rewards/margins": 0.21256020665168762, + "rewards/rejected": -1.6037606000900269, + "step": 176 + }, + { + "epoch": 0.37423191278493556, + "grad_norm": 1.8331164121627808, + "learning_rate": 7.884717389261934e-07, + "logits/chosen": -0.4836267828941345, + "logits/rejected": -0.5018677115440369, + "logps/chosen": -0.7895969152450562, + "logps/rejected": -0.927432656288147, + "loss": 1.2467, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5791938304901123, + "rewards/margins": 0.27567166090011597, + "rewards/rejected": -1.854865312576294, + "step": 177 + }, + { + "epoch": 0.37634621737694085, + "grad_norm": 2.165984869003296, + "learning_rate": 7.854379014263876e-07, + "logits/chosen": -0.46125832200050354, + "logits/rejected": -0.39802712202072144, + "logps/chosen": -0.8382925391197205, + "logps/rejected": -0.9422982931137085, + "loss": 1.339, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.676585078239441, + "rewards/margins": 0.20801125466823578, + "rewards/rejected": -1.884596586227417, + "step": 178 + }, + { + "epoch": 0.37846052196894614, + "grad_norm": 0.522197425365448, + "learning_rate": 7.823883936130817e-07, + "logits/chosen": -0.4747823476791382, + "logits/rejected": -0.4888593554496765, + "logps/chosen": -0.723059892654419, + "logps/rejected": -0.84626305103302, + "loss": 1.2708, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.446119785308838, + "rewards/margins": 0.24640652537345886, + "rewards/rejected": -1.69252610206604, + "step": 179 + }, + { + "epoch": 0.38057482656095143, + "grad_norm": 1.9690748453140259, + "learning_rate": 7.793233829018262e-07, + "logits/chosen": -0.5430271625518799, + "logits/rejected": -0.5403288006782532, + "logps/chosen": -0.8244275450706482, + "logps/rejected": -0.9133931994438171, + "loss": 1.3306, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6488550901412964, + "rewards/margins": 0.17793115973472595, + "rewards/rejected": -1.8267863988876343, + "step": 180 + }, + { + "epoch": 0.3826891311529567, + "grad_norm": 2.9181363582611084, + "learning_rate": 7.762430375592688e-07, + "logits/chosen": -0.4843495786190033, + "logits/rejected": -0.47929176688194275, + "logps/chosen": -0.8097372055053711, + "logps/rejected": -0.8973760008811951, + "loss": 1.3283, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6194744110107422, + "rewards/margins": 0.17527759075164795, + "rewards/rejected": -1.7947520017623901, + "step": 181 + }, + { + "epoch": 0.384803435744962, + "grad_norm": 4.227083683013916, + "learning_rate": 7.731475266939158e-07, + "logits/chosen": -0.5047686696052551, + "logits/rejected": -0.4921850264072418, + "logps/chosen": -0.875984787940979, + "logps/rejected": -1.0406755208969116, + "loss": 1.3169, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.751969575881958, + "rewards/margins": 0.32938146591186523, + "rewards/rejected": -2.0813510417938232, + "step": 182 + }, + { + "epoch": 0.3869177403369673, + "grad_norm": 1.2871490716934204, + "learning_rate": 7.700370202468489e-07, + "logits/chosen": -0.5123783349990845, + "logits/rejected": -0.55179762840271, + "logps/chosen": -0.8869211077690125, + "logps/rejected": -1.1082773208618164, + "loss": 1.216, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.773842215538025, + "rewards/margins": 0.4427123963832855, + "rewards/rejected": -2.216554641723633, + "step": 183 + }, + { + "epoch": 0.3890320449289726, + "grad_norm": 1.3015679121017456, + "learning_rate": 7.669116889823954e-07, + "logits/chosen": -0.49182361364364624, + "logits/rejected": -0.5180585384368896, + "logps/chosen": -0.8816227912902832, + "logps/rejected": -0.9516821503639221, + "loss": 1.3449, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7632455825805664, + "rewards/margins": 0.14011862874031067, + "rewards/rejected": -1.9033643007278442, + "step": 184 + }, + { + "epoch": 0.3911463495209779, + "grad_norm": 4.280956268310547, + "learning_rate": 7.637717044787526e-07, + "logits/chosen": -0.5702117681503296, + "logits/rejected": -0.5475804209709167, + "logps/chosen": -0.9307697415351868, + "logps/rejected": -1.0322346687316895, + "loss": 1.3434, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.8615394830703735, + "rewards/margins": 0.20292985439300537, + "rewards/rejected": -2.064469337463379, + "step": 185 + }, + { + "epoch": 0.39326065411298317, + "grad_norm": 1.3511455059051514, + "learning_rate": 7.606172391185699e-07, + "logits/chosen": -0.5466108322143555, + "logits/rejected": -0.551085352897644, + "logps/chosen": -1.0657893419265747, + "logps/rejected": -1.15786612033844, + "loss": 1.3549, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -2.1315786838531494, + "rewards/margins": 0.18415334820747375, + "rewards/rejected": -2.31573224067688, + "step": 186 + }, + { + "epoch": 0.39537495870498846, + "grad_norm": 0.7001176476478577, + "learning_rate": 7.574484660794836e-07, + "logits/chosen": -0.4849010407924652, + "logits/rejected": -0.5057946443557739, + "logps/chosen": -1.0784757137298584, + "logps/rejected": -1.2035218477249146, + "loss": 1.3556, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.156951427459717, + "rewards/margins": 0.25009211897850037, + "rewards/rejected": -2.407043695449829, + "step": 187 + }, + { + "epoch": 0.39748926329699374, + "grad_norm": 3.1405649185180664, + "learning_rate": 7.542655593246103e-07, + "logits/chosen": -0.5316596031188965, + "logits/rejected": -0.5658366680145264, + "logps/chosen": -1.0630009174346924, + "logps/rejected": -1.2867177724838257, + "loss": 1.2612, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.1260018348693848, + "rewards/margins": 0.447433739900589, + "rewards/rejected": -2.5734355449676514, + "step": 188 + }, + { + "epoch": 0.39960356788899903, + "grad_norm": 2.142986297607422, + "learning_rate": 7.510686935929962e-07, + "logits/chosen": -0.5959028005599976, + "logits/rejected": -0.5836039781570435, + "logps/chosen": -1.111003041267395, + "logps/rejected": -1.1858208179473877, + "loss": 1.3958, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.22200608253479, + "rewards/margins": 0.149635449051857, + "rewards/rejected": -2.3716416358947754, + "step": 189 + }, + { + "epoch": 0.40171787248100427, + "grad_norm": 1.9227335453033447, + "learning_rate": 7.478580443900246e-07, + "logits/chosen": -0.607532799243927, + "logits/rejected": -0.6102017760276794, + "logps/chosen": -1.3353261947631836, + "logps/rejected": -1.3975369930267334, + "loss": 1.457, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -2.670652389526367, + "rewards/margins": 0.12442154437303543, + "rewards/rejected": -2.795073986053467, + "step": 190 + }, + { + "epoch": 0.40383217707300956, + "grad_norm": 0.8509105443954468, + "learning_rate": 7.446337879777802e-07, + "logits/chosen": -0.5903070569038391, + "logits/rejected": -0.5728173851966858, + "logps/chosen": -1.27094566822052, + "logps/rejected": -1.3024815320968628, + "loss": 1.4953, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.54189133644104, + "rewards/margins": 0.06307169049978256, + "rewards/rejected": -2.6049630641937256, + "step": 191 + }, + { + "epoch": 0.40594648166501485, + "grad_norm": 1.1561088562011719, + "learning_rate": 7.413961013653725e-07, + "logits/chosen": -0.5578102469444275, + "logits/rejected": -0.5907329320907593, + "logps/chosen": -1.3817013502120972, + "logps/rejected": -1.419295072555542, + "loss": 1.4865, + "rewards/accuracies": 0.515625, + "rewards/chosen": -2.7634027004241943, + "rewards/margins": 0.07518734782934189, + "rewards/rejected": -2.838590145111084, + "step": 192 + }, + { + "epoch": 0.40806078625702014, + "grad_norm": 8.165387153625488, + "learning_rate": 7.381451622992183e-07, + "logits/chosen": -0.5213198661804199, + "logits/rejected": -0.5392848253250122, + "logps/chosen": -1.1798306703567505, + "logps/rejected": -1.2692899703979492, + "loss": 1.3971, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.359661340713501, + "rewards/margins": 0.17891867458820343, + "rewards/rejected": -2.5385799407958984, + "step": 193 + }, + { + "epoch": 0.4101750908490254, + "grad_norm": 1.2850884199142456, + "learning_rate": 7.348811492532839e-07, + "logits/chosen": -0.5382787585258484, + "logits/rejected": -0.5274642705917358, + "logps/chosen": -1.242587685585022, + "logps/rejected": -1.272438645362854, + "loss": 1.4795, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.485175371170044, + "rewards/margins": 0.05970197170972824, + "rewards/rejected": -2.544877290725708, + "step": 194 + }, + { + "epoch": 0.4122893954410307, + "grad_norm": 4.910929203033447, + "learning_rate": 7.316042414192864e-07, + "logits/chosen": -0.6186666488647461, + "logits/rejected": -0.6255884170532227, + "logps/chosen": -1.1743704080581665, + "logps/rejected": -1.2720146179199219, + "loss": 1.4127, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.348740816116333, + "rewards/margins": 0.19528816640377045, + "rewards/rejected": -2.5440292358398438, + "step": 195 + }, + { + "epoch": 0.414403700033036, + "grad_norm": 4.270901203155518, + "learning_rate": 7.283146186968565e-07, + "logits/chosen": -0.5861366987228394, + "logits/rejected": -0.6005197763442993, + "logps/chosen": -1.2127022743225098, + "logps/rejected": -1.3036490678787231, + "loss": 1.4067, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.4254045486450195, + "rewards/margins": 0.18189355731010437, + "rewards/rejected": -2.6072981357574463, + "step": 196 + }, + { + "epoch": 0.4165180046250413, + "grad_norm": 0.3070116639137268, + "learning_rate": 7.250124616836622e-07, + "logits/chosen": -0.6026022434234619, + "logits/rejected": -0.5920048952102661, + "logps/chosen": -1.0706496238708496, + "logps/rejected": -1.2879594564437866, + "loss": 1.2465, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.141299247741699, + "rewards/margins": 0.4346192479133606, + "rewards/rejected": -2.5759189128875732, + "step": 197 + }, + { + "epoch": 0.4186323092170466, + "grad_norm": 1.160252571105957, + "learning_rate": 7.216979516654943e-07, + "logits/chosen": -0.5808722376823425, + "logits/rejected": -0.5770124197006226, + "logps/chosen": -1.0426011085510254, + "logps/rejected": -1.1295092105865479, + "loss": 1.4244, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.085202217102051, + "rewards/margins": 0.1738162338733673, + "rewards/rejected": -2.2590184211730957, + "step": 198 + }, + { + "epoch": 0.4207466138090519, + "grad_norm": 4.6966471672058105, + "learning_rate": 7.183712706063132e-07, + "logits/chosen": -0.5958350896835327, + "logits/rejected": -0.6440161466598511, + "logps/chosen": -0.981076717376709, + "logps/rejected": -1.1257147789001465, + "loss": 1.3175, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.962153434753418, + "rewards/margins": 0.28927627205848694, + "rewards/rejected": -2.251429557800293, + "step": 199 + }, + { + "epoch": 0.42286091840105716, + "grad_norm": 2.9395248889923096, + "learning_rate": 7.150326011382603e-07, + "logits/chosen": -0.5647889375686646, + "logits/rejected": -0.5762943625450134, + "logps/chosen": -0.8101261854171753, + "logps/rejected": -1.0001438856124878, + "loss": 1.2135, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6202523708343506, + "rewards/margins": 0.38003528118133545, + "rewards/rejected": -2.0002877712249756, + "step": 200 + }, + { + "epoch": 0.42497522299306245, + "grad_norm": 1.2575147151947021, + "learning_rate": 7.116821265516306e-07, + "logits/chosen": -0.5834293961524963, + "logits/rejected": -0.5929508805274963, + "logps/chosen": -0.8768399953842163, + "logps/rejected": -1.0942046642303467, + "loss": 1.219, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7536799907684326, + "rewards/margins": 0.43472927808761597, + "rewards/rejected": -2.1884093284606934, + "step": 201 + }, + { + "epoch": 0.42708952758506774, + "grad_norm": 1.4035751819610596, + "learning_rate": 7.083200307848115e-07, + "logits/chosen": -0.5424078106880188, + "logits/rejected": -0.5316082239151001, + "logps/chosen": -0.8791903257369995, + "logps/rejected": -0.9323580265045166, + "loss": 1.3675, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.758380651473999, + "rewards/margins": 0.10633517056703568, + "rewards/rejected": -1.8647160530090332, + "step": 202 + }, + { + "epoch": 0.42920383217707303, + "grad_norm": 1.8622503280639648, + "learning_rate": 7.049464984141829e-07, + "logits/chosen": -0.5329294204711914, + "logits/rejected": -0.5523126721382141, + "logps/chosen": -0.695776104927063, + "logps/rejected": -0.8400713801383972, + "loss": 1.2285, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.391552209854126, + "rewards/margins": 0.28859058022499084, + "rewards/rejected": -1.6801427602767944, + "step": 203 + }, + { + "epoch": 0.4313181367690783, + "grad_norm": 0.8603182435035706, + "learning_rate": 7.015617146439861e-07, + "logits/chosen": -0.4516752064228058, + "logits/rejected": -0.46907976269721985, + "logps/chosen": -0.6868133544921875, + "logps/rejected": -0.8646677732467651, + "loss": 1.2417, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.373626708984375, + "rewards/margins": 0.355709046125412, + "rewards/rejected": -1.7293355464935303, + "step": 204 + }, + { + "epoch": 0.43343244136108355, + "grad_norm": 0.6437748670578003, + "learning_rate": 6.981658652961546e-07, + "logits/chosen": -0.6159051656723022, + "logits/rejected": -0.6000130772590637, + "logps/chosen": -0.7715178728103638, + "logps/rejected": -0.8714219331741333, + "loss": 1.3469, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5430357456207275, + "rewards/margins": 0.19980813562870026, + "rewards/rejected": -1.7428438663482666, + "step": 205 + }, + { + "epoch": 0.43554674595308884, + "grad_norm": 1.2309322357177734, + "learning_rate": 6.947591368001137e-07, + "logits/chosen": -0.5913614630699158, + "logits/rejected": -0.6128537654876709, + "logps/chosen": -0.7512561678886414, + "logps/rejected": -0.8872793912887573, + "loss": 1.26, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5025123357772827, + "rewards/margins": 0.2720465660095215, + "rewards/rejected": -1.7745587825775146, + "step": 206 + }, + { + "epoch": 0.43766105054509413, + "grad_norm": 0.6153685450553894, + "learning_rate": 6.913417161825449e-07, + "logits/chosen": -0.5976595878601074, + "logits/rejected": -0.6222202181816101, + "logps/chosen": -0.837669849395752, + "logps/rejected": -0.9835771918296814, + "loss": 1.2986, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.675339698791504, + "rewards/margins": 0.2918146252632141, + "rewards/rejected": -1.9671543836593628, + "step": 207 + }, + { + "epoch": 0.4397753551370994, + "grad_norm": 1.9922760725021362, + "learning_rate": 6.87913791057119e-07, + "logits/chosen": -0.6808818578720093, + "logits/rejected": -0.6692708730697632, + "logps/chosen": -0.7088961601257324, + "logps/rejected": -0.8256410360336304, + "loss": 1.281, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4177923202514648, + "rewards/margins": 0.23348984122276306, + "rewards/rejected": -1.6512820720672607, + "step": 208 + }, + { + "epoch": 0.4418896597291047, + "grad_norm": 1.9562067985534668, + "learning_rate": 6.844755496141961e-07, + "logits/chosen": -0.5282632112503052, + "logits/rejected": -0.5692226886749268, + "logps/chosen": -0.7235382795333862, + "logps/rejected": -0.801092803478241, + "loss": 1.3227, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4470765590667725, + "rewards/margins": 0.1551089584827423, + "rewards/rejected": -1.602185606956482, + "step": 209 + }, + { + "epoch": 0.44400396432111, + "grad_norm": 0.8182584047317505, + "learning_rate": 6.81027180610493e-07, + "logits/chosen": -0.6418904662132263, + "logits/rejected": -0.5941328406333923, + "logps/chosen": -0.820648729801178, + "logps/rejected": -0.8864803910255432, + "loss": 1.3498, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.641297459602356, + "rewards/margins": 0.13166317343711853, + "rewards/rejected": -1.7729607820510864, + "step": 210 + }, + { + "epoch": 0.4461182689131153, + "grad_norm": 3.075260877609253, + "learning_rate": 6.775688733587227e-07, + "logits/chosen": -0.5926809906959534, + "logits/rejected": -0.5844541788101196, + "logps/chosen": -0.7822425365447998, + "logps/rejected": -0.8866626024246216, + "loss": 1.2884, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5644850730895996, + "rewards/margins": 0.20884013175964355, + "rewards/rejected": -1.7733252048492432, + "step": 211 + }, + { + "epoch": 0.4482325735051206, + "grad_norm": 0.8032744526863098, + "learning_rate": 6.741008177171993e-07, + "logits/chosen": -0.579971432685852, + "logits/rejected": -0.5978566408157349, + "logps/chosen": -0.721234917640686, + "logps/rejected": -0.8368514180183411, + "loss": 1.2781, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.442469835281372, + "rewards/margins": 0.23123310506343842, + "rewards/rejected": -1.6737028360366821, + "step": 212 + }, + { + "epoch": 0.45034687809712587, + "grad_norm": 0.6680911779403687, + "learning_rate": 6.706232040794161e-07, + "logits/chosen": -0.6748596429824829, + "logits/rejected": -0.6615546941757202, + "logps/chosen": -0.7931480407714844, + "logps/rejected": -0.8879257440567017, + "loss": 1.337, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5862960815429688, + "rewards/margins": 0.1895553171634674, + "rewards/rejected": -1.7758514881134033, + "step": 213 + }, + { + "epoch": 0.45246118268913116, + "grad_norm": 2.5107688903808594, + "learning_rate": 6.671362233635925e-07, + "logits/chosen": -0.6460363268852234, + "logits/rejected": -0.6273557543754578, + "logps/chosen": -0.823783814907074, + "logps/rejected": -0.87412428855896, + "loss": 1.3756, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.647567629814148, + "rewards/margins": 0.10068092495203018, + "rewards/rejected": -1.74824857711792, + "step": 214 + }, + { + "epoch": 0.45457548728113645, + "grad_norm": 2.2206740379333496, + "learning_rate": 6.636400670021933e-07, + "logits/chosen": -0.6295229196548462, + "logits/rejected": -0.6330893039703369, + "logps/chosen": -0.807812511920929, + "logps/rejected": -0.9784457683563232, + "loss": 1.2259, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.615625023841858, + "rewards/margins": 0.3412665128707886, + "rewards/rejected": -1.9568915367126465, + "step": 215 + }, + { + "epoch": 0.45668979187314174, + "grad_norm": 1.2925803661346436, + "learning_rate": 6.601349269314187e-07, + "logits/chosen": -0.6001027822494507, + "logits/rejected": -0.6305864453315735, + "logps/chosen": -0.7216315865516663, + "logps/rejected": -0.8616191744804382, + "loss": 1.269, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.4432631731033325, + "rewards/margins": 0.2799749970436096, + "rewards/rejected": -1.7232383489608765, + "step": 216 + }, + { + "epoch": 0.458804096465147, + "grad_norm": 4.863992214202881, + "learning_rate": 6.566209955806679e-07, + "logits/chosen": -0.5307935476303101, + "logits/rejected": -0.5385264754295349, + "logps/chosen": -0.8053566813468933, + "logps/rejected": -0.9241464734077454, + "loss": 1.3325, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.6107133626937866, + "rewards/margins": 0.23757943511009216, + "rewards/rejected": -1.8482929468154907, + "step": 217 + }, + { + "epoch": 0.4609184010571523, + "grad_norm": 1.0189604759216309, + "learning_rate": 6.530984658619733e-07, + "logits/chosen": -0.7031885385513306, + "logits/rejected": -0.7072005867958069, + "logps/chosen": -0.8382629752159119, + "logps/rejected": -0.9468755722045898, + "loss": 1.3276, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.6765259504318237, + "rewards/margins": 0.21722503006458282, + "rewards/rejected": -1.8937511444091797, + "step": 218 + }, + { + "epoch": 0.4630327056491576, + "grad_norm": 1.1178699731826782, + "learning_rate": 6.495675311594122e-07, + "logits/chosen": -0.5736142992973328, + "logits/rejected": -0.5926069021224976, + "logps/chosen": -0.7676032781600952, + "logps/rejected": -0.9179919958114624, + "loss": 1.278, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5352065563201904, + "rewards/margins": 0.3007773756980896, + "rewards/rejected": -1.8359839916229248, + "step": 219 + }, + { + "epoch": 0.4651470102411629, + "grad_norm": 2.4985287189483643, + "learning_rate": 6.460283853184879e-07, + "logits/chosen": -0.6372602581977844, + "logits/rejected": -0.6313104033470154, + "logps/chosen": -0.8754556179046631, + "logps/rejected": -0.9803894758224487, + "loss": 1.3166, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7509112358093262, + "rewards/margins": 0.2098677009344101, + "rewards/rejected": -1.9607789516448975, + "step": 220 + }, + { + "epoch": 0.46726131483316813, + "grad_norm": 1.5675435066223145, + "learning_rate": 6.424812226354889e-07, + "logits/chosen": -0.6377983093261719, + "logits/rejected": -0.6666730642318726, + "logps/chosen": -0.7556843757629395, + "logps/rejected": -0.9096466302871704, + "loss": 1.2397, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.511368751525879, + "rewards/margins": 0.30792441964149475, + "rewards/rejected": -1.8192932605743408, + "step": 221 + }, + { + "epoch": 0.4693756194251734, + "grad_norm": 2.853426218032837, + "learning_rate": 6.389262378468219e-07, + "logits/chosen": -0.6055567860603333, + "logits/rejected": -0.612144947052002, + "logps/chosen": -0.8588352203369141, + "logps/rejected": -0.8928595185279846, + "loss": 1.4022, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7176704406738281, + "rewards/margins": 0.06804870069026947, + "rewards/rejected": -1.7857190370559692, + "step": 222 + }, + { + "epoch": 0.4714899240171787, + "grad_norm": 0.528042733669281, + "learning_rate": 6.353636261183213e-07, + "logits/chosen": -0.6543641090393066, + "logits/rejected": -0.6635830402374268, + "logps/chosen": -0.7858147621154785, + "logps/rejected": -0.9400445222854614, + "loss": 1.2446, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.571629524230957, + "rewards/margins": 0.3084595203399658, + "rewards/rejected": -1.8800890445709229, + "step": 223 + }, + { + "epoch": 0.473604228609184, + "grad_norm": 1.1155768632888794, + "learning_rate": 6.317935830345338e-07, + "logits/chosen": -0.5700349807739258, + "logits/rejected": -0.6560614705085754, + "logps/chosen": -0.8426170945167542, + "logps/rejected": -0.9983471035957336, + "loss": 1.3204, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6852341890335083, + "rewards/margins": 0.3114599883556366, + "rewards/rejected": -1.9966942071914673, + "step": 224 + }, + { + "epoch": 0.4757185332011893, + "grad_norm": 0.802669107913971, + "learning_rate": 6.282163045879823e-07, + "logits/chosen": -0.6912901401519775, + "logits/rejected": -0.7201069593429565, + "logps/chosen": -0.8135342597961426, + "logps/rejected": -0.9537283182144165, + "loss": 1.2961, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6270685195922852, + "rewards/margins": 0.2803882658481598, + "rewards/rejected": -1.907456636428833, + "step": 225 + }, + { + "epoch": 0.4778328377931946, + "grad_norm": 1.709757924079895, + "learning_rate": 6.246319871684047e-07, + "logits/chosen": -0.7573816776275635, + "logits/rejected": -0.8028420209884644, + "logps/chosen": -0.891952633857727, + "logps/rejected": -1.0168029069900513, + "loss": 1.333, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.783905267715454, + "rewards/margins": 0.24970072507858276, + "rewards/rejected": -2.0336058139801025, + "step": 226 + }, + { + "epoch": 0.47994714238519987, + "grad_norm": 2.170957326889038, + "learning_rate": 6.210408275519734e-07, + "logits/chosen": -0.6915597915649414, + "logits/rejected": -0.7027997970581055, + "logps/chosen": -0.9063036441802979, + "logps/rejected": -1.0104373693466187, + "loss": 1.3388, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8126072883605957, + "rewards/margins": 0.20826762914657593, + "rewards/rejected": -2.0208747386932373, + "step": 227 + }, + { + "epoch": 0.48206144697720515, + "grad_norm": 1.8802261352539062, + "learning_rate": 6.174430228904919e-07, + "logits/chosen": -0.689726710319519, + "logits/rejected": -0.7143282890319824, + "logps/chosen": -0.7480812072753906, + "logps/rejected": -0.8698041439056396, + "loss": 1.2836, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4961624145507812, + "rewards/margins": 0.24344584345817566, + "rewards/rejected": -1.7396082878112793, + "step": 228 + }, + { + "epoch": 0.48417575156921044, + "grad_norm": 2.5202934741973877, + "learning_rate": 6.13838770700571e-07, + "logits/chosen": -0.6858299374580383, + "logits/rejected": -0.7115206122398376, + "logps/chosen": -0.8575515151023865, + "logps/rejected": -0.9657347202301025, + "loss": 1.3046, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.715103030204773, + "rewards/margins": 0.21636635065078735, + "rewards/rejected": -1.931469440460205, + "step": 229 + }, + { + "epoch": 0.48629005616121573, + "grad_norm": 1.268512487411499, + "learning_rate": 6.102282688527859e-07, + "logits/chosen": -0.7078689932823181, + "logits/rejected": -0.7254161238670349, + "logps/chosen": -0.8850880861282349, + "logps/rejected": -1.031385898590088, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7701761722564697, + "rewards/margins": 0.29259535670280457, + "rewards/rejected": -2.062771797180176, + "step": 230 + }, + { + "epoch": 0.488404360753221, + "grad_norm": 1.7285584211349487, + "learning_rate": 6.066117155608135e-07, + "logits/chosen": -0.7325868606567383, + "logits/rejected": -0.7433226108551025, + "logps/chosen": -0.8014956116676331, + "logps/rejected": -0.9653260111808777, + "loss": 1.2429, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.6029912233352661, + "rewards/margins": 0.32766085863113403, + "rewards/rejected": -1.9306520223617554, + "step": 231 + }, + { + "epoch": 0.4905186653452263, + "grad_norm": 0.6270304322242737, + "learning_rate": 6.029893093705491e-07, + "logits/chosen": -0.692166805267334, + "logits/rejected": -0.6799293756484985, + "logps/chosen": -0.7850213646888733, + "logps/rejected": -0.8839574456214905, + "loss": 1.2967, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.5700427293777466, + "rewards/margins": 0.19787229597568512, + "rewards/rejected": -1.767914891242981, + "step": 232 + }, + { + "epoch": 0.4926329699372316, + "grad_norm": 1.0160484313964844, + "learning_rate": 5.993612491492087e-07, + "logits/chosen": -0.7095844149589539, + "logits/rejected": -0.71524578332901, + "logps/chosen": -0.7063854336738586, + "logps/rejected": -0.8855549097061157, + "loss": 1.2176, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.4127708673477173, + "rewards/margins": 0.3583390712738037, + "rewards/rejected": -1.7711098194122314, + "step": 233 + }, + { + "epoch": 0.4947472745292369, + "grad_norm": 2.225841999053955, + "learning_rate": 5.957277340744094e-07, + "logits/chosen": -0.7488946318626404, + "logits/rejected": -0.7588428854942322, + "logps/chosen": -0.9203822612762451, + "logps/rejected": -1.0089298486709595, + "loss": 1.355, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8407645225524902, + "rewards/margins": 0.17709502577781677, + "rewards/rejected": -2.017859697341919, + "step": 234 + }, + { + "epoch": 0.4968615791212422, + "grad_norm": 1.9577795267105103, + "learning_rate": 5.920889636232351e-07, + "logits/chosen": -0.8078997731208801, + "logits/rejected": -0.8064825534820557, + "logps/chosen": -0.8004480004310608, + "logps/rejected": -0.9856831431388855, + "loss": 1.2273, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.6008960008621216, + "rewards/margins": 0.3704703152179718, + "rewards/rejected": -1.971366286277771, + "step": 235 + }, + { + "epoch": 0.4989758837132474, + "grad_norm": 2.5050246715545654, + "learning_rate": 5.884451375612865e-07, + "logits/chosen": -0.7499472498893738, + "logits/rejected": -0.7421904802322388, + "logps/chosen": -0.8363584876060486, + "logps/rejected": -0.9543781876564026, + "loss": 1.3002, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6727169752120972, + "rewards/margins": 0.23603934049606323, + "rewards/rejected": -1.9087563753128052, + "step": 236 + }, + { + "epoch": 0.5010901883052528, + "grad_norm": 0.585436224937439, + "learning_rate": 5.847964559317128e-07, + "logits/chosen": -0.730015218257904, + "logits/rejected": -0.7154791355133057, + "logps/chosen": -0.8828849196434021, + "logps/rejected": -0.9897070527076721, + "loss": 1.347, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.7657698392868042, + "rewards/margins": 0.21364440023899078, + "rewards/rejected": -1.9794141054153442, + "step": 237 + }, + { + "epoch": 0.503204492897258, + "grad_norm": 0.9204092621803284, + "learning_rate": 5.8114311904423e-07, + "logits/chosen": -0.759974479675293, + "logits/rejected": -0.7793674468994141, + "logps/chosen": -0.8321584463119507, + "logps/rejected": -1.0809751749038696, + "loss": 1.2185, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6643168926239014, + "rewards/margins": 0.4976334273815155, + "rewards/rejected": -2.1619503498077393, + "step": 238 + }, + { + "epoch": 0.5053187974892633, + "grad_norm": 5.147011756896973, + "learning_rate": 5.774853274641243e-07, + "logits/chosen": -0.7148956060409546, + "logits/rejected": -0.7363921403884888, + "logps/chosen": -0.8623124361038208, + "logps/rejected": -1.0681498050689697, + "loss": 1.2353, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7246248722076416, + "rewards/margins": 0.4116746187210083, + "rewards/rejected": -2.1362996101379395, + "step": 239 + }, + { + "epoch": 0.5074331020812686, + "grad_norm": 1.9065529108047485, + "learning_rate": 5.738232820012407e-07, + "logits/chosen": -0.7158540487289429, + "logits/rejected": -0.7083900570869446, + "logps/chosen": -0.981558620929718, + "logps/rejected": -1.054612636566162, + "loss": 1.3594, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.963117241859436, + "rewards/margins": 0.14610806107521057, + "rewards/rejected": -2.109225273132324, + "step": 240 + }, + { + "epoch": 0.5095474066732739, + "grad_norm": 2.4411256313323975, + "learning_rate": 5.701571836989591e-07, + "logits/chosen": -0.8441444039344788, + "logits/rejected": -0.8529233336448669, + "logps/chosen": -0.8665949702262878, + "logps/rejected": -1.030572772026062, + "loss": 1.2477, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.7331899404525757, + "rewards/margins": 0.3279556334018707, + "rewards/rejected": -2.061145544052124, + "step": 241 + }, + { + "epoch": 0.5116617112652792, + "grad_norm": 2.461113214492798, + "learning_rate": 5.664872338231571e-07, + "logits/chosen": -0.7463312149047852, + "logits/rejected": -0.7725105285644531, + "logps/chosen": -0.9185941815376282, + "logps/rejected": -1.1244423389434814, + "loss": 1.2404, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.8371883630752563, + "rewards/margins": 0.411696195602417, + "rewards/rejected": -2.248884677886963, + "step": 242 + }, + { + "epoch": 0.5137760158572844, + "grad_norm": 3.5861761569976807, + "learning_rate": 5.628136338511607e-07, + "logits/chosen": -0.8432914018630981, + "logits/rejected": -0.85801100730896, + "logps/chosen": -0.8873915672302246, + "logps/rejected": -1.0090795755386353, + "loss": 1.3072, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.7747831344604492, + "rewards/margins": 0.24337637424468994, + "rewards/rejected": -2.0181591510772705, + "step": 243 + }, + { + "epoch": 0.5158903204492897, + "grad_norm": 2.109071969985962, + "learning_rate": 5.591365854606829e-07, + "logits/chosen": -0.7899532318115234, + "logits/rejected": -0.7548331618309021, + "logps/chosen": -0.9333330392837524, + "logps/rejected": -1.00949227809906, + "loss": 1.3749, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.8666660785675049, + "rewards/margins": 0.1523183286190033, + "rewards/rejected": -2.01898455619812, + "step": 244 + }, + { + "epoch": 0.518004625041295, + "grad_norm": 2.2017955780029297, + "learning_rate": 5.554562905187527e-07, + "logits/chosen": -0.7569047212600708, + "logits/rejected": -0.7679808735847473, + "logps/chosen": -0.9779613614082336, + "logps/rejected": -1.1713427305221558, + "loss": 1.2628, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.9559227228164673, + "rewards/margins": 0.3867628276348114, + "rewards/rejected": -2.3426854610443115, + "step": 245 + }, + { + "epoch": 0.5201189296333003, + "grad_norm": 4.651991367340088, + "learning_rate": 5.517729510706315e-07, + "logits/chosen": -0.8546395301818848, + "logits/rejected": -0.8609369397163391, + "logps/chosen": -0.9926605224609375, + "logps/rejected": -1.1553713083267212, + "loss": 1.2812, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.985321044921875, + "rewards/margins": 0.32542160153388977, + "rewards/rejected": -2.3107426166534424, + "step": 246 + }, + { + "epoch": 0.5222332342253055, + "grad_norm": 2.6384060382843018, + "learning_rate": 5.480867693287223e-07, + "logits/chosen": -0.7734386324882507, + "logits/rejected": -0.7963250875473022, + "logps/chosen": -0.8996341824531555, + "logps/rejected": -1.0466523170471191, + "loss": 1.2849, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.799268364906311, + "rewards/margins": 0.2940361201763153, + "rewards/rejected": -2.0933046340942383, + "step": 247 + }, + { + "epoch": 0.5243475388173109, + "grad_norm": 1.3608977794647217, + "learning_rate": 5.443979476614674e-07, + "logits/chosen": -0.7350472807884216, + "logits/rejected": -0.7215992212295532, + "logps/chosen": -0.8887076377868652, + "logps/rejected": -1.0147045850753784, + "loss": 1.3182, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7774152755737305, + "rewards/margins": 0.25199398398399353, + "rewards/rejected": -2.029409170150757, + "step": 248 + }, + { + "epoch": 0.5264618434093161, + "grad_norm": 3.017115354537964, + "learning_rate": 5.407066885822391e-07, + "logits/chosen": -0.827782154083252, + "logits/rejected": -0.8471929430961609, + "logps/chosen": -0.9262440800666809, + "logps/rejected": -1.1658306121826172, + "loss": 1.1882, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.8524881601333618, + "rewards/margins": 0.47917306423187256, + "rewards/rejected": -2.3316612243652344, + "step": 249 + }, + { + "epoch": 0.5285761480013215, + "grad_norm": 0.7805312275886536, + "learning_rate": 5.370131947382214e-07, + "logits/chosen": -0.7815499305725098, + "logits/rejected": -0.8279274702072144, + "logps/chosen": -0.968708872795105, + "logps/rejected": -1.2697322368621826, + "loss": 1.2092, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.93741774559021, + "rewards/margins": 0.6020466685295105, + "rewards/rejected": -2.5394644737243652, + "step": 250 + }, + { + "epoch": 0.5306904525933267, + "grad_norm": 2.229363441467285, + "learning_rate": 5.333176688992855e-07, + "logits/chosen": -0.7824153900146484, + "logits/rejected": -0.8154900074005127, + "logps/chosen": -1.0211957693099976, + "logps/rejected": -1.2145965099334717, + "loss": 1.3074, + "rewards/accuracies": 0.609375, + "rewards/chosen": -2.042391538619995, + "rewards/margins": 0.3868010938167572, + "rewards/rejected": -2.4291930198669434, + "step": 251 + }, + { + "epoch": 0.532804757185332, + "grad_norm": 1.1359837055206299, + "learning_rate": 5.296203139468571e-07, + "logits/chosen": -0.7467613220214844, + "logits/rejected": -0.7548531889915466, + "logps/chosen": -1.0614902973175049, + "logps/rejected": -1.2674376964569092, + "loss": 1.2512, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.1229805946350098, + "rewards/margins": 0.4118950664997101, + "rewards/rejected": -2.5348753929138184, + "step": 252 + }, + { + "epoch": 0.5349190617773373, + "grad_norm": 3.0548548698425293, + "learning_rate": 5.259213328627792e-07, + "logits/chosen": -0.7868636250495911, + "logits/rejected": -0.8130850791931152, + "logps/chosen": -1.0743666887283325, + "logps/rejected": -1.2010191679000854, + "loss": 1.3275, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.148733377456665, + "rewards/margins": 0.2533051669597626, + "rewards/rejected": -2.402038335800171, + "step": 253 + }, + { + "epoch": 0.5370333663693426, + "grad_norm": 1.7205246686935425, + "learning_rate": 5.222209287181676e-07, + "logits/chosen": -0.81404709815979, + "logits/rejected": -0.8481613397598267, + "logps/chosen": -1.1599587202072144, + "logps/rejected": -1.4234716892242432, + "loss": 1.2894, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.3199174404144287, + "rewards/margins": 0.5270256400108337, + "rewards/rejected": -2.8469433784484863, + "step": 254 + }, + { + "epoch": 0.5391476709613479, + "grad_norm": 2.2516112327575684, + "learning_rate": 5.185193046622634e-07, + "logits/chosen": -0.8112510442733765, + "logits/rejected": -0.8310728073120117, + "logps/chosen": -1.1263186931610107, + "logps/rejected": -1.3256827592849731, + "loss": 1.3552, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.2526373863220215, + "rewards/margins": 0.39872825145721436, + "rewards/rejected": -2.6513655185699463, + "step": 255 + }, + { + "epoch": 0.5412619755533532, + "grad_norm": 2.8379359245300293, + "learning_rate": 5.148166639112799e-07, + "logits/chosen": -0.8202102184295654, + "logits/rejected": -0.845209002494812, + "logps/chosen": -1.264180302619934, + "logps/rejected": -1.6190590858459473, + "loss": 1.2083, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.528360605239868, + "rewards/margins": 0.7097575068473816, + "rewards/rejected": -3.2381181716918945, + "step": 256 + }, + { + "epoch": 0.5433762801453584, + "grad_norm": 4.676355838775635, + "learning_rate": 5.111132097372459e-07, + "logits/chosen": -0.8866451978683472, + "logits/rejected": -0.8642281889915466, + "logps/chosen": -1.3194389343261719, + "logps/rejected": -1.4506916999816895, + "loss": 1.4002, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.6388778686523438, + "rewards/margins": 0.2625058591365814, + "rewards/rejected": -2.901383399963379, + "step": 257 + }, + { + "epoch": 0.5454905847373638, + "grad_norm": 2.55251407623291, + "learning_rate": 5.074091454568463e-07, + "logits/chosen": -0.7903708815574646, + "logits/rejected": -0.8010709881782532, + "logps/chosen": -1.3550961017608643, + "logps/rejected": -1.661428451538086, + "loss": 1.2131, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.7101922035217285, + "rewards/margins": 0.6126645803451538, + "rewards/rejected": -3.322856903076172, + "step": 258 + }, + { + "epoch": 0.547604889329369, + "grad_norm": 4.116572856903076, + "learning_rate": 5.037046744202611e-07, + "logits/chosen": -0.7501232624053955, + "logits/rejected": -0.7825176119804382, + "logps/chosen": -1.2111856937408447, + "logps/rejected": -1.5176191329956055, + "loss": 1.1345, + "rewards/accuracies": 0.71875, + "rewards/chosen": -2.4223713874816895, + "rewards/margins": 0.6128667593002319, + "rewards/rejected": -3.035238265991211, + "step": 259 + }, + { + "epoch": 0.5497191939213742, + "grad_norm": 2.0285205841064453, + "learning_rate": 5e-07, + "logits/chosen": -0.8355445861816406, + "logits/rejected": -0.8497716784477234, + "logps/chosen": -1.1876304149627686, + "logps/rejected": -1.4788450002670288, + "loss": 1.1559, + "rewards/accuracies": 0.671875, + "rewards/chosen": -2.375260829925537, + "rewards/margins": 0.5824294090270996, + "rewards/rejected": -2.9576900005340576, + "step": 260 + }, + { + "epoch": 0.5518334985133796, + "grad_norm": 4.681185245513916, + "learning_rate": 4.962953255797389e-07, + "logits/chosen": -0.8240503072738647, + "logits/rejected": -0.8016488552093506, + "logps/chosen": -1.2238959074020386, + "logps/rejected": -1.4727882146835327, + "loss": 1.2914, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.447791814804077, + "rewards/margins": 0.49778467416763306, + "rewards/rejected": -2.9455764293670654, + "step": 261 + }, + { + "epoch": 0.5539478031053848, + "grad_norm": 5.15679931640625, + "learning_rate": 4.925908545431537e-07, + "logits/chosen": -0.728940486907959, + "logits/rejected": -0.7355924248695374, + "logps/chosen": -1.3356778621673584, + "logps/rejected": -1.6726096868515015, + "loss": 1.1434, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.671355724334717, + "rewards/margins": 0.6738637685775757, + "rewards/rejected": -3.345219373703003, + "step": 262 + }, + { + "epoch": 0.5560621076973902, + "grad_norm": 2.481048345565796, + "learning_rate": 4.888867902627543e-07, + "logits/chosen": -0.8311591148376465, + "logits/rejected": -0.8191719055175781, + "logps/chosen": -1.2743335962295532, + "logps/rejected": -1.5339927673339844, + "loss": 1.2164, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.5486671924591064, + "rewards/margins": 0.5193185210227966, + "rewards/rejected": -3.0679855346679688, + "step": 263 + }, + { + "epoch": 0.5581764122893954, + "grad_norm": 3.6758291721343994, + "learning_rate": 4.851833360887201e-07, + "logits/chosen": -0.6787989735603333, + "logits/rejected": -0.668928325176239, + "logps/chosen": -1.2278664112091064, + "logps/rejected": -1.4955706596374512, + "loss": 1.1942, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -2.455732822418213, + "rewards/margins": 0.535408616065979, + "rewards/rejected": -2.9911413192749023, + "step": 264 + }, + { + "epoch": 0.5602907168814008, + "grad_norm": 2.7282023429870605, + "learning_rate": 4.814806953377365e-07, + "logits/chosen": -0.7772133350372314, + "logits/rejected": -0.7689889073371887, + "logps/chosen": -1.1954048871994019, + "logps/rejected": -1.444943904876709, + "loss": 1.2686, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.3908097743988037, + "rewards/margins": 0.4990782141685486, + "rewards/rejected": -2.889887809753418, + "step": 265 + }, + { + "epoch": 0.562405021473406, + "grad_norm": 2.8753116130828857, + "learning_rate": 4.777790712818323e-07, + "logits/chosen": -0.6946043968200684, + "logits/rejected": -0.7001516819000244, + "logps/chosen": -1.2844620943069458, + "logps/rejected": -1.486103892326355, + "loss": 1.284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.5689241886138916, + "rewards/margins": 0.4032836854457855, + "rewards/rejected": -2.97220778465271, + "step": 266 + }, + { + "epoch": 0.5645193260654113, + "grad_norm": 1.5583593845367432, + "learning_rate": 4.740786671372209e-07, + "logits/chosen": -0.7396820187568665, + "logits/rejected": -0.7129873037338257, + "logps/chosen": -1.410097599029541, + "logps/rejected": -1.6091456413269043, + "loss": 1.3158, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.820195198059082, + "rewards/margins": 0.3980959951877594, + "rewards/rejected": -3.2182912826538086, + "step": 267 + }, + { + "epoch": 0.5666336306574166, + "grad_norm": 3.5984952449798584, + "learning_rate": 4.703796860531429e-07, + "logits/chosen": -0.7031830549240112, + "logits/rejected": -0.700330376625061, + "logps/chosen": -1.633664608001709, + "logps/rejected": -1.9186874628067017, + "loss": 1.2479, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.267329216003418, + "rewards/margins": 0.5700456500053406, + "rewards/rejected": -3.8373749256134033, + "step": 268 + }, + { + "epoch": 0.5687479352494219, + "grad_norm": 6.295733451843262, + "learning_rate": 4.666823311007144e-07, + "logits/chosen": -0.8001950979232788, + "logits/rejected": -0.8042099475860596, + "logps/chosen": -1.5675832033157349, + "logps/rejected": -1.9247075319290161, + "loss": 1.1759, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.1351664066314697, + "rewards/margins": 0.7142485976219177, + "rewards/rejected": -3.8494150638580322, + "step": 269 + }, + { + "epoch": 0.5708622398414271, + "grad_norm": 3.6349036693573, + "learning_rate": 4.6298680526177855e-07, + "logits/chosen": -0.8108068704605103, + "logits/rejected": -0.8030902147293091, + "logps/chosen": -1.8205997943878174, + "logps/rejected": -2.195197105407715, + "loss": 1.1864, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.6411995887756348, + "rewards/margins": 0.7491948008537292, + "rewards/rejected": -4.39039421081543, + "step": 270 + }, + { + "epoch": 0.5729765444334325, + "grad_norm": 4.786395072937012, + "learning_rate": 4.59293311417761e-07, + "logits/chosen": -0.798182487487793, + "logits/rejected": -0.7736828327178955, + "logps/chosen": -1.8617057800292969, + "logps/rejected": -2.08984637260437, + "loss": 1.3947, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.7234115600585938, + "rewards/margins": 0.4562810957431793, + "rewards/rejected": -4.17969274520874, + "step": 271 + }, + { + "epoch": 0.5750908490254377, + "grad_norm": 6.7946457862854, + "learning_rate": 4.556020523385326e-07, + "logits/chosen": -0.7530428171157837, + "logits/rejected": -0.7395590543746948, + "logps/chosen": -1.8709862232208252, + "logps/rejected": -2.3599390983581543, + "loss": 1.1025, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.7419724464416504, + "rewards/margins": 0.9779053926467896, + "rewards/rejected": -4.719878196716309, + "step": 272 + }, + { + "epoch": 0.5772051536174431, + "grad_norm": 4.877624988555908, + "learning_rate": 4.5191323067127773e-07, + "logits/chosen": -0.7732480764389038, + "logits/rejected": -0.7835702300071716, + "logps/chosen": -2.0340800285339355, + "logps/rejected": -2.330742835998535, + "loss": 1.3198, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.068160057067871, + "rewards/margins": 0.5933258533477783, + "rewards/rejected": -4.66148567199707, + "step": 273 + }, + { + "epoch": 0.5793194582094483, + "grad_norm": 9.001680374145508, + "learning_rate": 4.482270489293685e-07, + "logits/chosen": -0.9062263369560242, + "logits/rejected": -0.9105854630470276, + "logps/chosen": -2.1364972591400146, + "logps/rejected": -2.4467523097991943, + "loss": 1.3464, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.272994518280029, + "rewards/margins": 0.6205099821090698, + "rewards/rejected": -4.893504619598389, + "step": 274 + }, + { + "epoch": 0.5814337628014535, + "grad_norm": 2.811025619506836, + "learning_rate": 4.445437094812475e-07, + "logits/chosen": -0.8593579530715942, + "logits/rejected": -0.8343831896781921, + "logps/chosen": -2.452843189239502, + "logps/rejected": -2.7551848888397217, + "loss": 1.3536, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.905686378479004, + "rewards/margins": 0.6046838760375977, + "rewards/rejected": -5.510369777679443, + "step": 275 + }, + { + "epoch": 0.5835480673934589, + "grad_norm": 2.2030158042907715, + "learning_rate": 4.4086341453931714e-07, + "logits/chosen": -0.8991417288780212, + "logits/rejected": -0.8766486644744873, + "logps/chosen": -2.30641508102417, + "logps/rejected": -2.7606654167175293, + "loss": 1.1708, + "rewards/accuracies": 0.6875, + "rewards/chosen": -4.61283016204834, + "rewards/margins": 0.9085015654563904, + "rewards/rejected": -5.521330833435059, + "step": 276 + }, + { + "epoch": 0.5856623719854641, + "grad_norm": 5.5185227394104, + "learning_rate": 4.371863661488393e-07, + "logits/chosen": -0.8738227486610413, + "logits/rejected": -0.8665530681610107, + "logps/chosen": -2.29125714302063, + "logps/rejected": -2.7014153003692627, + "loss": 1.1883, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.58251428604126, + "rewards/margins": 0.8203167915344238, + "rewards/rejected": -5.402830600738525, + "step": 277 + }, + { + "epoch": 0.5877766765774695, + "grad_norm": 2.0779521465301514, + "learning_rate": 4.3351276617684285e-07, + "logits/chosen": -0.958415150642395, + "logits/rejected": -0.9585077166557312, + "logps/chosen": -2.4368410110473633, + "logps/rejected": -2.798506736755371, + "loss": 1.1749, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.873682022094727, + "rewards/margins": 0.7233313322067261, + "rewards/rejected": -5.597013473510742, + "step": 278 + }, + { + "epoch": 0.5898909811694747, + "grad_norm": 2.884877920150757, + "learning_rate": 4.29842816301041e-07, + "logits/chosen": -0.9413051605224609, + "logits/rejected": -0.9224691987037659, + "logps/chosen": -2.485034942626953, + "logps/rejected": -2.911332368850708, + "loss": 1.2035, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.970069885253906, + "rewards/margins": 0.8525944948196411, + "rewards/rejected": -5.822664737701416, + "step": 279 + }, + { + "epoch": 0.59200528576148, + "grad_norm": 5.203248500823975, + "learning_rate": 4.2617671799875944e-07, + "logits/chosen": -0.9359334111213684, + "logits/rejected": -0.9387660026550293, + "logps/chosen": -2.378349542617798, + "logps/rejected": -2.730886936187744, + "loss": 1.2253, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.756699085235596, + "rewards/margins": 0.7050745487213135, + "rewards/rejected": -5.461773872375488, + "step": 280 + }, + { + "epoch": 0.5941195903534853, + "grad_norm": 6.818525314331055, + "learning_rate": 4.225146725358758e-07, + "logits/chosen": -0.8864554166793823, + "logits/rejected": -0.8813320398330688, + "logps/chosen": -2.4233975410461426, + "logps/rejected": -2.8188178539276123, + "loss": 1.2281, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -4.846795082092285, + "rewards/margins": 0.7908411622047424, + "rewards/rejected": -5.637635707855225, + "step": 281 + }, + { + "epoch": 0.5962338949454906, + "grad_norm": 2.529154062271118, + "learning_rate": 4.1885688095577e-07, + "logits/chosen": -0.8420325517654419, + "logits/rejected": -0.8822402954101562, + "logps/chosen": -2.626488447189331, + "logps/rejected": -3.1887192726135254, + "loss": 1.0561, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.252976894378662, + "rewards/margins": 1.1244611740112305, + "rewards/rejected": -6.377438545227051, + "step": 282 + }, + { + "epoch": 0.5983481995374959, + "grad_norm": 3.0739686489105225, + "learning_rate": 4.152035440682873e-07, + "logits/chosen": -0.8550993204116821, + "logits/rejected": -0.8528580665588379, + "logps/chosen": -2.6387887001037598, + "logps/rejected": -2.9952192306518555, + "loss": 1.3409, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.2775774002075195, + "rewards/margins": 0.7128612399101257, + "rewards/rejected": -5.990438461303711, + "step": 283 + }, + { + "epoch": 0.6004625041295012, + "grad_norm": 3.6649062633514404, + "learning_rate": 4.1155486243871363e-07, + "logits/chosen": -0.8643282651901245, + "logits/rejected": -0.9175342321395874, + "logps/chosen": -2.929072618484497, + "logps/rejected": -3.105940580368042, + "loss": 1.5121, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.858145236968994, + "rewards/margins": 0.3537355065345764, + "rewards/rejected": -6.211881160736084, + "step": 284 + }, + { + "epoch": 0.6025768087215064, + "grad_norm": 2.5071723461151123, + "learning_rate": 4.0791103637676486e-07, + "logits/chosen": -0.8368631601333618, + "logits/rejected": -0.819808304309845, + "logps/chosen": -3.0672600269317627, + "logps/rejected": -3.4685003757476807, + "loss": 1.3236, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.134520053863525, + "rewards/margins": 0.8024805784225464, + "rewards/rejected": -6.937000751495361, + "step": 285 + }, + { + "epoch": 0.6046911133135118, + "grad_norm": 8.780280113220215, + "learning_rate": 4.042722659255906e-07, + "logits/chosen": -0.8249569535255432, + "logits/rejected": -0.8442113995552063, + "logps/chosen": -3.3199872970581055, + "logps/rejected": -3.7276291847229004, + "loss": 1.322, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -6.639974594116211, + "rewards/margins": 0.8152831792831421, + "rewards/rejected": -7.455258369445801, + "step": 286 + }, + { + "epoch": 0.606805417905517, + "grad_norm": 3.4388678073883057, + "learning_rate": 4.006387508507914e-07, + "logits/chosen": -0.7224047780036926, + "logits/rejected": -0.7616450786590576, + "logps/chosen": -2.9411330223083496, + "logps/rejected": -3.32680082321167, + "loss": 1.2868, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.882266044616699, + "rewards/margins": 0.7713361978530884, + "rewards/rejected": -6.65360164642334, + "step": 287 + }, + { + "epoch": 0.6089197224975224, + "grad_norm": 5.095273971557617, + "learning_rate": 3.970106906294509e-07, + "logits/chosen": -0.7394692897796631, + "logits/rejected": -0.7316830158233643, + "logps/chosen": -2.9902045726776123, + "logps/rejected": -3.469916820526123, + "loss": 1.1694, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.980409145355225, + "rewards/margins": 0.9594244360923767, + "rewards/rejected": -6.939833641052246, + "step": 288 + }, + { + "epoch": 0.6110340270895276, + "grad_norm": 2.1398613452911377, + "learning_rate": 3.933882844391866e-07, + "logits/chosen": -0.8181312084197998, + "logits/rejected": -0.833306610584259, + "logps/chosen": -3.0137529373168945, + "logps/rejected": -3.4241840839385986, + "loss": 1.2453, + "rewards/accuracies": 0.609375, + "rewards/chosen": -6.027505874633789, + "rewards/margins": 0.8208625316619873, + "rewards/rejected": -6.848368167877197, + "step": 289 + }, + { + "epoch": 0.6131483316815328, + "grad_norm": 4.185284614562988, + "learning_rate": 3.89771731147214e-07, + "logits/chosen": -0.7805104851722717, + "logits/rejected": -0.8086984753608704, + "logps/chosen": -2.984957218170166, + "logps/rejected": -3.430112361907959, + "loss": 1.2671, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -5.969914436340332, + "rewards/margins": 0.890310525894165, + "rewards/rejected": -6.860224723815918, + "step": 290 + }, + { + "epoch": 0.6152626362735382, + "grad_norm": 7.104829788208008, + "learning_rate": 3.861612292994292e-07, + "logits/chosen": -0.7788286209106445, + "logits/rejected": -0.8027424216270447, + "logps/chosen": -2.896563768386841, + "logps/rejected": -3.1082046031951904, + "loss": 1.4853, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.793127536773682, + "rewards/margins": 0.42328107357025146, + "rewards/rejected": -6.216409206390381, + "step": 291 + }, + { + "epoch": 0.6173769408655434, + "grad_norm": 3.795579433441162, + "learning_rate": 3.825569771095082e-07, + "logits/chosen": -0.8044757843017578, + "logits/rejected": -0.7828265428543091, + "logps/chosen": -2.8059256076812744, + "logps/rejected": -3.3121094703674316, + "loss": 1.1299, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.611851215362549, + "rewards/margins": 1.0123679637908936, + "rewards/rejected": -6.624218940734863, + "step": 292 + }, + { + "epoch": 0.6194912454575487, + "grad_norm": 4.486142158508301, + "learning_rate": 3.7895917244802655e-07, + "logits/chosen": -0.7511788606643677, + "logits/rejected": -0.7885503768920898, + "logps/chosen": -2.927251100540161, + "logps/rejected": -3.1605303287506104, + "loss": 1.426, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.854502201080322, + "rewards/margins": 0.466558575630188, + "rewards/rejected": -6.321060657501221, + "step": 293 + }, + { + "epoch": 0.621605550049554, + "grad_norm": 3.3942787647247314, + "learning_rate": 3.753680128315952e-07, + "logits/chosen": -0.8230300545692444, + "logits/rejected": -0.8042524456977844, + "logps/chosen": -2.524353504180908, + "logps/rejected": -2.8687357902526855, + "loss": 1.2653, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.048707008361816, + "rewards/margins": 0.6887640953063965, + "rewards/rejected": -5.737471580505371, + "step": 294 + }, + { + "epoch": 0.6237198546415593, + "grad_norm": 4.326812744140625, + "learning_rate": 3.717836954120178e-07, + "logits/chosen": -0.7763381004333496, + "logits/rejected": -0.7852378487586975, + "logps/chosen": -2.4861948490142822, + "logps/rejected": -2.8822267055511475, + "loss": 1.124, + "rewards/accuracies": 0.75, + "rewards/chosen": -4.9723896980285645, + "rewards/margins": 0.7920635938644409, + "rewards/rejected": -5.764453411102295, + "step": 295 + }, + { + "epoch": 0.6258341592335646, + "grad_norm": 3.886293649673462, + "learning_rate": 3.6820641696546627e-07, + "logits/chosen": -0.8350138664245605, + "logits/rejected": -0.8594292998313904, + "logps/chosen": -2.1301493644714355, + "logps/rejected": -2.3678014278411865, + "loss": 1.3532, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.260298728942871, + "rewards/margins": 0.4753049314022064, + "rewards/rejected": -4.735602855682373, + "step": 296 + }, + { + "epoch": 0.6279484638255699, + "grad_norm": 1.9318888187408447, + "learning_rate": 3.6463637388167875e-07, + "logits/chosen": -0.812870979309082, + "logits/rejected": -0.8393633961677551, + "logps/chosen": -2.0607728958129883, + "logps/rejected": -2.4457521438598633, + "loss": 1.2317, + "rewards/accuracies": 0.609375, + "rewards/chosen": -4.121545791625977, + "rewards/margins": 0.76995849609375, + "rewards/rejected": -4.891504287719727, + "step": 297 + }, + { + "epoch": 0.6300627684175751, + "grad_norm": 2.731139898300171, + "learning_rate": 3.610737621531781e-07, + "logits/chosen": -0.7860711216926575, + "logits/rejected": -0.8006534576416016, + "logps/chosen": -1.9324530363082886, + "logps/rejected": -2.2838711738586426, + "loss": 1.2986, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.864906072616577, + "rewards/margins": 0.7028359174728394, + "rewards/rejected": -4.567742347717285, + "step": 298 + }, + { + "epoch": 0.6321770730095805, + "grad_norm": 3.118441581726074, + "learning_rate": 3.575187773645112e-07, + "logits/chosen": -0.6946629285812378, + "logits/rejected": -0.6832380294799805, + "logps/chosen": -2.2569775581359863, + "logps/rejected": -2.6153128147125244, + "loss": 1.2166, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.513955116271973, + "rewards/margins": 0.7166703343391418, + "rewards/rejected": -5.230625629425049, + "step": 299 + }, + { + "epoch": 0.6342913776015857, + "grad_norm": 4.998100757598877, + "learning_rate": 3.5397161468151214e-07, + "logits/chosen": -0.7972643375396729, + "logits/rejected": -0.7864660620689392, + "logps/chosen": -2.227022886276245, + "logps/rejected": -2.57175350189209, + "loss": 1.2796, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.45404577255249, + "rewards/margins": 0.6894608736038208, + "rewards/rejected": -5.14350700378418, + "step": 300 + }, + { + "epoch": 0.6364056821935911, + "grad_norm": 6.259451866149902, + "learning_rate": 3.5043246884058777e-07, + "logits/chosen": -0.6282143592834473, + "logits/rejected": -0.6314865350723267, + "logps/chosen": -2.4372308254241943, + "logps/rejected": -2.8582205772399902, + "loss": 1.1592, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.874461650848389, + "rewards/margins": 0.8419792056083679, + "rewards/rejected": -5.7164411544799805, + "step": 301 + }, + { + "epoch": 0.6385199867855963, + "grad_norm": 2.577531337738037, + "learning_rate": 3.4690153413802653e-07, + "logits/chosen": -0.658220648765564, + "logits/rejected": -0.6330516934394836, + "logps/chosen": -2.6647050380706787, + "logps/rejected": -3.1917996406555176, + "loss": 1.2609, + "rewards/accuracies": 0.671875, + "rewards/chosen": -5.329410076141357, + "rewards/margins": 1.0541892051696777, + "rewards/rejected": -6.383599281311035, + "step": 302 + }, + { + "epoch": 0.6406342913776016, + "grad_norm": 4.733935356140137, + "learning_rate": 3.4337900441933227e-07, + "logits/chosen": -0.5048555731773376, + "logits/rejected": -0.45112305879592896, + "logps/chosen": -2.5193920135498047, + "logps/rejected": -3.1279971599578857, + "loss": 1.0648, + "rewards/accuracies": 0.765625, + "rewards/chosen": -5.038784027099609, + "rewards/margins": 1.2172104120254517, + "rewards/rejected": -6.2559943199157715, + "step": 303 + }, + { + "epoch": 0.6427485959696069, + "grad_norm": 5.54962158203125, + "learning_rate": 3.3986507306858125e-07, + "logits/chosen": -0.5305406451225281, + "logits/rejected": -0.5246613025665283, + "logps/chosen": -2.8851962089538574, + "logps/rejected": -3.248018264770508, + "loss": 1.4329, + "rewards/accuracies": 0.625, + "rewards/chosen": -5.770392417907715, + "rewards/margins": 0.7256444692611694, + "rewards/rejected": -6.496036529541016, + "step": 304 + }, + { + "epoch": 0.6448629005616121, + "grad_norm": 2.827944278717041, + "learning_rate": 3.363599329978066e-07, + "logits/chosen": -0.4795135259628296, + "logits/rejected": -0.4911767244338989, + "logps/chosen": -3.0268373489379883, + "logps/rejected": -3.4411511421203613, + "loss": 1.4083, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.053674697875977, + "rewards/margins": 0.8286278247833252, + "rewards/rejected": -6.882302284240723, + "step": 305 + }, + { + "epoch": 0.6469772051536175, + "grad_norm": 5.35672664642334, + "learning_rate": 3.328637766364075e-07, + "logits/chosen": -0.4823904037475586, + "logits/rejected": -0.48555058240890503, + "logps/chosen": -2.990793466567993, + "logps/rejected": -3.529240846633911, + "loss": 1.1417, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.981586933135986, + "rewards/margins": 1.0768945217132568, + "rewards/rejected": -7.058481693267822, + "step": 306 + }, + { + "epoch": 0.6490915097456227, + "grad_norm": 2.8072359561920166, + "learning_rate": 3.2937679592058396e-07, + "logits/chosen": -0.4903571605682373, + "logits/rejected": -0.46411609649658203, + "logps/chosen": -2.8665530681610107, + "logps/rejected": -3.542123556137085, + "loss": 1.2485, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.7331061363220215, + "rewards/margins": 1.3511409759521484, + "rewards/rejected": -7.08424711227417, + "step": 307 + }, + { + "epoch": 0.651205814337628, + "grad_norm": 6.341434478759766, + "learning_rate": 3.2589918228280066e-07, + "logits/chosen": -0.4496378004550934, + "logits/rejected": -0.35389459133148193, + "logps/chosen": -2.8208916187286377, + "logps/rejected": -3.326601505279541, + "loss": 1.3089, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -5.641783237457275, + "rewards/margins": 1.011419653892517, + "rewards/rejected": -6.653203010559082, + "step": 308 + }, + { + "epoch": 0.6533201189296333, + "grad_norm": 2.5416784286499023, + "learning_rate": 3.2243112664127723e-07, + "logits/chosen": -0.44504135847091675, + "logits/rejected": -0.42088568210601807, + "logps/chosen": -2.7710533142089844, + "logps/rejected": -3.4406185150146484, + "loss": 1.2213, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.542106628417969, + "rewards/margins": 1.3391309976577759, + "rewards/rejected": -6.881237030029297, + "step": 309 + }, + { + "epoch": 0.6554344235216386, + "grad_norm": 4.573229789733887, + "learning_rate": 3.189728193895069e-07, + "logits/chosen": -0.31100764870643616, + "logits/rejected": -0.32552966475486755, + "logps/chosen": -3.099289655685425, + "logps/rejected": -3.5152204036712646, + "loss": 1.3571, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -6.19857931137085, + "rewards/margins": 0.8318620324134827, + "rewards/rejected": -7.030440807342529, + "step": 310 + }, + { + "epoch": 0.6575487281136438, + "grad_norm": 3.7587928771972656, + "learning_rate": 3.155244503858041e-07, + "logits/chosen": -0.4225979447364807, + "logits/rejected": -0.43882372975349426, + "logps/chosen": -2.9082608222961426, + "logps/rejected": -3.2239482402801514, + "loss": 1.3415, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.816521644592285, + "rewards/margins": 0.6313749551773071, + "rewards/rejected": -6.447896480560303, + "step": 311 + }, + { + "epoch": 0.6596630327056492, + "grad_norm": 5.79728889465332, + "learning_rate": 3.12086208942881e-07, + "logits/chosen": -0.48076939582824707, + "logits/rejected": -0.41990721225738525, + "logps/chosen": -2.7089650630950928, + "logps/rejected": -3.29990291595459, + "loss": 1.1423, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.4179301261901855, + "rewards/margins": 1.181876540184021, + "rewards/rejected": -6.59980583190918, + "step": 312 + }, + { + "epoch": 0.6617773372976544, + "grad_norm": 7.405224800109863, + "learning_rate": 3.086582838174551e-07, + "logits/chosen": -0.48003631830215454, + "logits/rejected": -0.40571871399879456, + "logps/chosen": -2.53741455078125, + "logps/rejected": -3.0145747661590576, + "loss": 1.3247, + "rewards/accuracies": 0.609375, + "rewards/chosen": -5.0748291015625, + "rewards/margins": 0.9543203115463257, + "rewards/rejected": -6.029149532318115, + "step": 313 + }, + { + "epoch": 0.6638916418896598, + "grad_norm": 6.371465682983398, + "learning_rate": 3.052408631998863e-07, + "logits/chosen": -0.42537638545036316, + "logits/rejected": -0.39384835958480835, + "logps/chosen": -3.006593942642212, + "logps/rejected": -3.4665465354919434, + "loss": 1.2648, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -6.013187885284424, + "rewards/margins": 0.919904887676239, + "rewards/rejected": -6.933093070983887, + "step": 314 + }, + { + "epoch": 0.666005946481665, + "grad_norm": 4.65411376953125, + "learning_rate": 3.018341347038453e-07, + "logits/chosen": -0.38848310708999634, + "logits/rejected": -0.3435167670249939, + "logps/chosen": -2.9562084674835205, + "logps/rejected": -3.5491316318511963, + "loss": 1.1353, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.912416934967041, + "rewards/margins": 1.1858452558517456, + "rewards/rejected": -7.098263263702393, + "step": 315 + }, + { + "epoch": 0.6681202510736703, + "grad_norm": 5.089771747589111, + "learning_rate": 2.9843828535601397e-07, + "logits/chosen": -0.3452882170677185, + "logits/rejected": -0.29303884506225586, + "logps/chosen": -2.5367987155914307, + "logps/rejected": -3.172724723815918, + "loss": 1.2002, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.073597431182861, + "rewards/margins": 1.2718524932861328, + "rewards/rejected": -6.345449447631836, + "step": 316 + }, + { + "epoch": 0.6702345556656756, + "grad_norm": 4.480255603790283, + "learning_rate": 2.9505350158581697e-07, + "logits/chosen": -0.47401517629623413, + "logits/rejected": -0.45950815081596375, + "logps/chosen": -2.45076322555542, + "logps/rejected": -2.998079299926758, + "loss": 1.2545, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.90152645111084, + "rewards/margins": 1.0946320295333862, + "rewards/rejected": -5.996158599853516, + "step": 317 + }, + { + "epoch": 0.6723488602576809, + "grad_norm": 3.6318399906158447, + "learning_rate": 2.916799692151884e-07, + "logits/chosen": -0.20774951577186584, + "logits/rejected": -0.21114808320999146, + "logps/chosen": -2.8932981491088867, + "logps/rejected": -3.613022565841675, + "loss": 1.1187, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.786596298217773, + "rewards/margins": 1.4394491910934448, + "rewards/rejected": -7.22604513168335, + "step": 318 + }, + { + "epoch": 0.6744631648496862, + "grad_norm": 6.601771831512451, + "learning_rate": 2.883178734483692e-07, + "logits/chosen": -0.3821495473384857, + "logits/rejected": -0.35181915760040283, + "logps/chosen": -2.5047662258148193, + "logps/rejected": -3.074918270111084, + "loss": 1.1545, + "rewards/accuracies": 0.71875, + "rewards/chosen": -5.009532451629639, + "rewards/margins": 1.1403042078018188, + "rewards/rejected": -6.149836540222168, + "step": 319 + }, + { + "epoch": 0.6765774694416914, + "grad_norm": 3.077775716781616, + "learning_rate": 2.849673988617399e-07, + "logits/chosen": -0.4517952799797058, + "logits/rejected": -0.3880998194217682, + "logps/chosen": -2.5404443740844727, + "logps/rejected": -3.007855176925659, + "loss": 1.2441, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -5.080888748168945, + "rewards/margins": 0.9348208904266357, + "rewards/rejected": -6.015710353851318, + "step": 320 + } + ], + "logging_steps": 1, + "max_steps": 472, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-384/config.json b/checkpoint-384/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ead276a9e0bbd4a0baae360715d8e80853974eb6 --- /dev/null +++ b/checkpoint-384/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "/home/v-liuzichuan/huggingface/Llama-3.2-3B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/checkpoint-384/generation_config.json b/checkpoint-384/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18d16b4cc3c799dd78e854fd824ddf8a2996285 --- /dev/null +++ b/checkpoint-384/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.2" +} diff --git a/checkpoint-384/model-00001-of-00002.safetensors b/checkpoint-384/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f14841d5c2b1df8e8485c795e7ca50eb7d6a90ca --- /dev/null +++ b/checkpoint-384/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f96fba2e3a12095ad166f2a31423a83705f0fb74c2a73d3e7f23b12cac6101 +size 4965799096 diff --git a/checkpoint-384/model-00002-of-00002.safetensors b/checkpoint-384/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..050e166b0ff18ca0a7e6bc85b16bb01c335e7f1a --- /dev/null +++ b/checkpoint-384/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ec0f934d6bb118555445aacc4f9231ac4f8d4dd58d59741e01674bd2260194 +size 2247734992 diff --git a/checkpoint-384/model.safetensors.index.json b/checkpoint-384/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ed64de846d720b9a7859dc20575fea8e8ca51940 --- /dev/null +++ b/checkpoint-384/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_size": 7213504512 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-384/special_tokens_map.json b/checkpoint-384/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-384/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-384/tokenizer.json b/checkpoint-384/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-384/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-384/tokenizer_config.json b/checkpoint-384/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdb7bfcc59fd293867d98e124a30b3ddf39c93 --- /dev/null +++ b/checkpoint-384/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-384/trainer_state.json b/checkpoint-384/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6715d47a8830130d2d99a6273b4951be3d891048 --- /dev/null +++ b/checkpoint-384/trainer_state.json @@ -0,0 +1,5793 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8118929633300297, + "eval_steps": 500, + "global_step": 384, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002114304592005286, + "grad_norm": 1.5018059015274048, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.3466828167438507, + "logits/rejected": -0.30099987983703613, + "logps/chosen": -0.9345186948776245, + "logps/rejected": -0.9117153882980347, + "loss": 1.4889, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.869037389755249, + "rewards/margins": -0.04560665041208267, + "rewards/rejected": -1.8234307765960693, + "step": 1 + }, + { + "epoch": 0.004228609184010572, + "grad_norm": 0.8093975186347961, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.4310421049594879, + "logits/rejected": -0.39132067561149597, + "logps/chosen": -0.8198825716972351, + "logps/rejected": -0.8644211888313293, + "loss": 1.376, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.6397651433944702, + "rewards/margins": 0.08907715976238251, + "rewards/rejected": -1.7288423776626587, + "step": 2 + }, + { + "epoch": 0.006342913776015857, + "grad_norm": 0.5377389788627625, + "learning_rate": 6.25e-08, + "logits/chosen": -0.46692028641700745, + "logits/rejected": -0.4649256467819214, + "logps/chosen": -0.9087910652160645, + "logps/rejected": -0.9648240804672241, + "loss": 1.3404, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.817582130432129, + "rewards/margins": 0.11206617206335068, + "rewards/rejected": -1.9296481609344482, + "step": 3 + }, + { + "epoch": 0.008457218368021144, + "grad_norm": 0.3221875727176666, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.416828453540802, + "logits/rejected": -0.3584724962711334, + "logps/chosen": -0.7818898558616638, + "logps/rejected": -0.8170815110206604, + "loss": 1.3806, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.5637797117233276, + "rewards/margins": 0.07038339227437973, + "rewards/rejected": -1.6341630220413208, + "step": 4 + }, + { + "epoch": 0.010571522960026428, + "grad_norm": 0.64655601978302, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.376886248588562, + "logits/rejected": -0.3516141474246979, + "logps/chosen": -0.8814125061035156, + "logps/rejected": -1.0214396715164185, + "loss": 1.2741, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7628250122070312, + "rewards/margins": 0.28005433082580566, + "rewards/rejected": -2.042879343032837, + "step": 5 + }, + { + "epoch": 0.012685827552031714, + "grad_norm": 0.4775894582271576, + "learning_rate": 1.25e-07, + "logits/chosen": -0.4757865369319916, + "logits/rejected": -0.4498941898345947, + "logps/chosen": -0.8962199687957764, + "logps/rejected": -0.9462199807167053, + "loss": 1.364, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.7924399375915527, + "rewards/margins": 0.10000008344650269, + "rewards/rejected": -1.8924399614334106, + "step": 6 + }, + { + "epoch": 0.014800132144037, + "grad_norm": 1.2459568977355957, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.38895344734191895, + "logits/rejected": -0.38165366649627686, + "logps/chosen": -0.9025766253471375, + "logps/rejected": -0.9465017318725586, + "loss": 1.3898, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.805153250694275, + "rewards/margins": 0.0878501906991005, + "rewards/rejected": -1.8930034637451172, + "step": 7 + }, + { + "epoch": 0.016914436736042288, + "grad_norm": 0.6195729374885559, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.3964853286743164, + "logits/rejected": -0.377862811088562, + "logps/chosen": -0.9054160118103027, + "logps/rejected": -0.9605879187583923, + "loss": 1.3821, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8108320236206055, + "rewards/margins": 0.1103438138961792, + "rewards/rejected": -1.9211758375167847, + "step": 8 + }, + { + "epoch": 0.019028741328047574, + "grad_norm": 1.2074137926101685, + "learning_rate": 1.875e-07, + "logits/chosen": -0.3729037344455719, + "logits/rejected": -0.38143450021743774, + "logps/chosen": -0.9328653216362, + "logps/rejected": -0.9905799627304077, + "loss": 1.3754, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8657306432724, + "rewards/margins": 0.11542946100234985, + "rewards/rejected": -1.9811599254608154, + "step": 9 + }, + { + "epoch": 0.021143045920052856, + "grad_norm": 0.2867220640182495, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.4263336658477783, + "logits/rejected": -0.42903271317481995, + "logps/chosen": -0.8979260325431824, + "logps/rejected": -0.9078099727630615, + "loss": 1.4438, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7958520650863647, + "rewards/margins": 0.019767940044403076, + "rewards/rejected": -1.815619945526123, + "step": 10 + }, + { + "epoch": 0.023257350512058142, + "grad_norm": 0.8363026976585388, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.3374914526939392, + "logits/rejected": -0.32399696111679077, + "logps/chosen": -0.8886098861694336, + "logps/rejected": -0.9484556317329407, + "loss": 1.3422, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7772197723388672, + "rewards/margins": 0.11969132721424103, + "rewards/rejected": -1.8969112634658813, + "step": 11 + }, + { + "epoch": 0.025371655104063428, + "grad_norm": 0.5406804084777832, + "learning_rate": 2.5e-07, + "logits/chosen": -0.42844679951667786, + "logits/rejected": -0.37984615564346313, + "logps/chosen": -0.861629843711853, + "logps/rejected": -0.8968492150306702, + "loss": 1.3922, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.723259687423706, + "rewards/margins": 0.07043875753879547, + "rewards/rejected": -1.7936984300613403, + "step": 12 + }, + { + "epoch": 0.027485959696068714, + "grad_norm": 0.9919329285621643, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.36495402455329895, + "logits/rejected": -0.3249490261077881, + "logps/chosen": -0.8502095937728882, + "logps/rejected": -0.8470643758773804, + "loss": 1.4334, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7004191875457764, + "rewards/margins": -0.006290358491241932, + "rewards/rejected": -1.6941287517547607, + "step": 13 + }, + { + "epoch": 0.029600264288074, + "grad_norm": 0.5477162003517151, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.4155704081058502, + "logits/rejected": -0.39535820484161377, + "logps/chosen": -1.0430240631103516, + "logps/rejected": -1.1318373680114746, + "loss": 1.3533, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.086048126220703, + "rewards/margins": 0.17762640118598938, + "rewards/rejected": -2.263674736022949, + "step": 14 + }, + { + "epoch": 0.031714568880079286, + "grad_norm": 0.26530712842941284, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.4810572564601898, + "logits/rejected": -0.42454615235328674, + "logps/chosen": -0.8741041421890259, + "logps/rejected": -0.9494178295135498, + "loss": 1.3655, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7482082843780518, + "rewards/margins": 0.15062758326530457, + "rewards/rejected": -1.8988356590270996, + "step": 15 + }, + { + "epoch": 0.033828873472084575, + "grad_norm": 0.9272629618644714, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.4440098702907562, + "logits/rejected": -0.3930297791957855, + "logps/chosen": -0.8473359942436218, + "logps/rejected": -0.9369213581085205, + "loss": 1.3248, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6946719884872437, + "rewards/margins": 0.17917080223560333, + "rewards/rejected": -1.873842716217041, + "step": 16 + }, + { + "epoch": 0.03594317806408986, + "grad_norm": 0.5912418961524963, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.3838099539279938, + "logits/rejected": -0.3507584035396576, + "logps/chosen": -0.8888350129127502, + "logps/rejected": -0.9361770749092102, + "loss": 1.383, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.7776700258255005, + "rewards/margins": 0.0946839451789856, + "rewards/rejected": -1.8723541498184204, + "step": 17 + }, + { + "epoch": 0.03805748265609515, + "grad_norm": 0.6536504030227661, + "learning_rate": 3.75e-07, + "logits/chosen": -0.3581697940826416, + "logits/rejected": -0.3620460629463196, + "logps/chosen": -0.8519617319107056, + "logps/rejected": -0.9022184610366821, + "loss": 1.3841, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7039234638214111, + "rewards/margins": 0.10051343590021133, + "rewards/rejected": -1.8044369220733643, + "step": 18 + }, + { + "epoch": 0.04017178724810043, + "grad_norm": 0.3433632552623749, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.37887442111968994, + "logits/rejected": -0.37543320655822754, + "logps/chosen": -0.9464104175567627, + "logps/rejected": -1.0017329454421997, + "loss": 1.3649, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8928208351135254, + "rewards/margins": 0.11064518243074417, + "rewards/rejected": -2.0034658908843994, + "step": 19 + }, + { + "epoch": 0.04228609184010571, + "grad_norm": 0.9764007329940796, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.44110679626464844, + "logits/rejected": -0.4280649721622467, + "logps/chosen": -0.9046768546104431, + "logps/rejected": -1.0464633703231812, + "loss": 1.2592, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.8093537092208862, + "rewards/margins": 0.2835729420185089, + "rewards/rejected": -2.0929267406463623, + "step": 20 + }, + { + "epoch": 0.044400396432111, + "grad_norm": 1.8563830852508545, + "learning_rate": 4.375e-07, + "logits/chosen": -0.45183491706848145, + "logits/rejected": -0.42935287952423096, + "logps/chosen": -0.9043138027191162, + "logps/rejected": -0.9462392926216125, + "loss": 1.3784, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.8086276054382324, + "rewards/margins": 0.08385094255208969, + "rewards/rejected": -1.892478585243225, + "step": 21 + }, + { + "epoch": 0.046514701024116284, + "grad_norm": 1.3473299741744995, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.37855517864227295, + "logits/rejected": -0.34429043531417847, + "logps/chosen": -0.9284683465957642, + "logps/rejected": -0.9454050064086914, + "loss": 1.4346, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.8569366931915283, + "rewards/margins": 0.03387312963604927, + "rewards/rejected": -1.8908100128173828, + "step": 22 + }, + { + "epoch": 0.04862900561612157, + "grad_norm": 0.940831184387207, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.39172160625457764, + "logits/rejected": -0.3695780634880066, + "logps/chosen": -0.9314202666282654, + "logps/rejected": -1.020229697227478, + "loss": 1.3322, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8628405332565308, + "rewards/margins": 0.17761869728565216, + "rewards/rejected": -2.040459394454956, + "step": 23 + }, + { + "epoch": 0.050743310208126856, + "grad_norm": 0.5783158540725708, + "learning_rate": 5e-07, + "logits/chosen": -0.4958629608154297, + "logits/rejected": -0.4257377088069916, + "logps/chosen": -0.9379237294197083, + "logps/rejected": -0.9415461421012878, + "loss": 1.441, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.8758474588394165, + "rewards/margins": 0.0072449808940291405, + "rewards/rejected": -1.8830922842025757, + "step": 24 + }, + { + "epoch": 0.052857614800132145, + "grad_norm": 1.4209853410720825, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -0.36407172679901123, + "logits/rejected": -0.3331725299358368, + "logps/chosen": -0.9192589521408081, + "logps/rejected": -0.9595308899879456, + "loss": 1.3994, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8385179042816162, + "rewards/margins": 0.080544114112854, + "rewards/rejected": -1.9190617799758911, + "step": 25 + }, + { + "epoch": 0.05497191939213743, + "grad_norm": 0.6310216188430786, + "learning_rate": 5.416666666666666e-07, + "logits/chosen": -0.41772690415382385, + "logits/rejected": -0.36565953493118286, + "logps/chosen": -0.8052878379821777, + "logps/rejected": -0.8673746585845947, + "loss": 1.3356, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6105756759643555, + "rewards/margins": 0.12417369335889816, + "rewards/rejected": -1.7347493171691895, + "step": 26 + }, + { + "epoch": 0.05708622398414272, + "grad_norm": 1.2933462858200073, + "learning_rate": 5.625e-07, + "logits/chosen": -0.4482795000076294, + "logits/rejected": -0.39409321546554565, + "logps/chosen": -0.8339261412620544, + "logps/rejected": -0.8675202131271362, + "loss": 1.3739, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.6678522825241089, + "rewards/margins": 0.06718815863132477, + "rewards/rejected": -1.7350404262542725, + "step": 27 + }, + { + "epoch": 0.059200528576148, + "grad_norm": 0.5808025002479553, + "learning_rate": 5.833333333333334e-07, + "logits/chosen": -0.37116044759750366, + "logits/rejected": -0.3478051722049713, + "logps/chosen": -0.8950318694114685, + "logps/rejected": -0.9756672978401184, + "loss": 1.3505, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.790063738822937, + "rewards/margins": 0.1612708568572998, + "rewards/rejected": -1.9513345956802368, + "step": 28 + }, + { + "epoch": 0.06131483316815329, + "grad_norm": 1.0569533109664917, + "learning_rate": 6.041666666666666e-07, + "logits/chosen": -0.421148419380188, + "logits/rejected": -0.38443076610565186, + "logps/chosen": -0.8021283745765686, + "logps/rejected": -0.8370179533958435, + "loss": 1.3916, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.6042567491531372, + "rewards/margins": 0.06977920234203339, + "rewards/rejected": -1.674035906791687, + "step": 29 + }, + { + "epoch": 0.06342913776015857, + "grad_norm": 0.42577147483825684, + "learning_rate": 6.249999999999999e-07, + "logits/chosen": -0.4429818391799927, + "logits/rejected": -0.3524704575538635, + "logps/chosen": -0.8916822671890259, + "logps/rejected": -0.8985542058944702, + "loss": 1.4321, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.7833645343780518, + "rewards/margins": 0.01374388113617897, + "rewards/rejected": -1.7971084117889404, + "step": 30 + }, + { + "epoch": 0.06554344235216386, + "grad_norm": 1.0056904554367065, + "learning_rate": 6.458333333333333e-07, + "logits/chosen": -0.376451700925827, + "logits/rejected": -0.342519074678421, + "logps/chosen": -0.9038617014884949, + "logps/rejected": -0.953092634677887, + "loss": 1.398, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8077234029769897, + "rewards/margins": 0.09846188127994537, + "rewards/rejected": -1.906185269355774, + "step": 31 + }, + { + "epoch": 0.06765774694416915, + "grad_norm": 0.5494012236595154, + "learning_rate": 6.666666666666666e-07, + "logits/chosen": -0.3459138870239258, + "logits/rejected": -0.3590989410877228, + "logps/chosen": -0.8274999260902405, + "logps/rejected": -0.8776509761810303, + "loss": 1.363, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.654999852180481, + "rewards/margins": 0.1003020703792572, + "rewards/rejected": -1.7553019523620605, + "step": 32 + }, + { + "epoch": 0.06977205153617443, + "grad_norm": 0.693267822265625, + "learning_rate": 6.875e-07, + "logits/chosen": -0.40053680539131165, + "logits/rejected": -0.37323904037475586, + "logps/chosen": -0.8255244493484497, + "logps/rejected": -0.8658804893493652, + "loss": 1.3712, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6510488986968994, + "rewards/margins": 0.08071210980415344, + "rewards/rejected": -1.7317609786987305, + "step": 33 + }, + { + "epoch": 0.07188635612817972, + "grad_norm": 2.213238000869751, + "learning_rate": 7.083333333333334e-07, + "logits/chosen": -0.40097948908805847, + "logits/rejected": -0.38190510869026184, + "logps/chosen": -0.9122671484947205, + "logps/rejected": -0.9549552798271179, + "loss": 1.36, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.824534296989441, + "rewards/margins": 0.0853763073682785, + "rewards/rejected": -1.9099105596542358, + "step": 34 + }, + { + "epoch": 0.074000660720185, + "grad_norm": 0.6859830021858215, + "learning_rate": 7.291666666666666e-07, + "logits/chosen": -0.42501094937324524, + "logits/rejected": -0.42549416422843933, + "logps/chosen": -1.0008373260498047, + "logps/rejected": -1.1157118082046509, + "loss": 1.3294, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.0016746520996094, + "rewards/margins": 0.22974897921085358, + "rewards/rejected": -2.2314236164093018, + "step": 35 + }, + { + "epoch": 0.0761149653121903, + "grad_norm": 0.6468721628189087, + "learning_rate": 7.5e-07, + "logits/chosen": -0.36494994163513184, + "logits/rejected": -0.30433908104896545, + "logps/chosen": -0.9062094688415527, + "logps/rejected": -0.920263409614563, + "loss": 1.4312, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.8124189376831055, + "rewards/margins": 0.02810765616595745, + "rewards/rejected": -1.840526819229126, + "step": 36 + }, + { + "epoch": 0.07822926990419557, + "grad_norm": 0.5085556507110596, + "learning_rate": 7.708333333333333e-07, + "logits/chosen": -0.4677881598472595, + "logits/rejected": -0.456132709980011, + "logps/chosen": -1.0101865530014038, + "logps/rejected": -1.0429682731628418, + "loss": 1.4132, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -2.0203731060028076, + "rewards/margins": 0.06556359678506851, + "rewards/rejected": -2.0859365463256836, + "step": 37 + }, + { + "epoch": 0.08034357449620086, + "grad_norm": 0.23813335597515106, + "learning_rate": 7.916666666666666e-07, + "logits/chosen": -0.3991190791130066, + "logits/rejected": -0.3664044141769409, + "logps/chosen": -0.9578174352645874, + "logps/rejected": -0.9229263067245483, + "loss": 1.4824, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.9156348705291748, + "rewards/margins": -0.06978224962949753, + "rewards/rejected": -1.8458526134490967, + "step": 38 + }, + { + "epoch": 0.08245787908820615, + "grad_norm": 0.587037980556488, + "learning_rate": 8.125e-07, + "logits/chosen": -0.37554049491882324, + "logits/rejected": -0.36305734515190125, + "logps/chosen": -0.8503091931343079, + "logps/rejected": -0.864615261554718, + "loss": 1.4086, + "rewards/accuracies": 0.4296875, + "rewards/chosen": -1.7006183862686157, + "rewards/margins": 0.028611989691853523, + "rewards/rejected": -1.729230523109436, + "step": 39 + }, + { + "epoch": 0.08457218368021142, + "grad_norm": 0.4172501862049103, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.4405443072319031, + "logits/rejected": -0.41723060607910156, + "logps/chosen": -0.8502858877182007, + "logps/rejected": -0.9114271402359009, + "loss": 1.3446, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7005717754364014, + "rewards/margins": 0.12228240817785263, + "rewards/rejected": -1.8228542804718018, + "step": 40 + }, + { + "epoch": 0.08668648827221671, + "grad_norm": 0.9275372624397278, + "learning_rate": 8.541666666666666e-07, + "logits/chosen": -0.4200601577758789, + "logits/rejected": -0.3478623628616333, + "logps/chosen": -0.892408013343811, + "logps/rejected": -0.9276402592658997, + "loss": 1.3887, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.784816026687622, + "rewards/margins": 0.07046431303024292, + "rewards/rejected": -1.8552805185317993, + "step": 41 + }, + { + "epoch": 0.088800792864222, + "grad_norm": 0.7317383289337158, + "learning_rate": 8.75e-07, + "logits/chosen": -0.37675267457962036, + "logits/rejected": -0.33540332317352295, + "logps/chosen": -0.7866061925888062, + "logps/rejected": -0.824250340461731, + "loss": 1.3837, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5732123851776123, + "rewards/margins": 0.07528844475746155, + "rewards/rejected": -1.648500680923462, + "step": 42 + }, + { + "epoch": 0.09091509745622729, + "grad_norm": 0.9452736973762512, + "learning_rate": 8.958333333333334e-07, + "logits/chosen": -0.4662383198738098, + "logits/rejected": -0.4447881579399109, + "logps/chosen": -0.9490666389465332, + "logps/rejected": -1.0112388134002686, + "loss": 1.3412, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8981332778930664, + "rewards/margins": 0.12434446066617966, + "rewards/rejected": -2.022477626800537, + "step": 43 + }, + { + "epoch": 0.09302940204823257, + "grad_norm": 0.2848323881626129, + "learning_rate": 9.166666666666665e-07, + "logits/chosen": -0.41404005885124207, + "logits/rejected": -0.3944583535194397, + "logps/chosen": -0.8224930167198181, + "logps/rejected": -0.8416361808776855, + "loss": 1.4027, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.6449860334396362, + "rewards/margins": 0.038286346942186356, + "rewards/rejected": -1.683272361755371, + "step": 44 + }, + { + "epoch": 0.09514370664023786, + "grad_norm": 0.7165678143501282, + "learning_rate": 9.374999999999999e-07, + "logits/chosen": -0.40475326776504517, + "logits/rejected": -0.3559921383857727, + "logps/chosen": -0.8070214986801147, + "logps/rejected": -0.8993593454360962, + "loss": 1.3148, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6140429973602295, + "rewards/margins": 0.18467575311660767, + "rewards/rejected": -1.7987186908721924, + "step": 45 + }, + { + "epoch": 0.09725801123224315, + "grad_norm": 0.4779021739959717, + "learning_rate": 9.583333333333334e-07, + "logits/chosen": -0.4171525835990906, + "logits/rejected": -0.42166149616241455, + "logps/chosen": -0.7872560024261475, + "logps/rejected": -0.8496187925338745, + "loss": 1.3356, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.574512004852295, + "rewards/margins": 0.12472567707300186, + "rewards/rejected": -1.699237585067749, + "step": 46 + }, + { + "epoch": 0.09937231582424844, + "grad_norm": 0.7870219349861145, + "learning_rate": 9.791666666666667e-07, + "logits/chosen": -0.3734116554260254, + "logits/rejected": -0.32778748869895935, + "logps/chosen": -0.7842286825180054, + "logps/rejected": -0.8161548972129822, + "loss": 1.3647, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5684573650360107, + "rewards/margins": 0.06385258585214615, + "rewards/rejected": -1.6323097944259644, + "step": 47 + }, + { + "epoch": 0.10148662041625371, + "grad_norm": 0.2597256600856781, + "learning_rate": 1e-06, + "logits/chosen": -0.4355677664279938, + "logits/rejected": -0.38983187079429626, + "logps/chosen": -0.8787693977355957, + "logps/rejected": -0.9383041262626648, + "loss": 1.35, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.7575387954711914, + "rewards/margins": 0.11906948685646057, + "rewards/rejected": -1.8766082525253296, + "step": 48 + }, + { + "epoch": 0.103600925008259, + "grad_norm": 0.9942799210548401, + "learning_rate": 9.999862751990697e-07, + "logits/chosen": -0.4244321882724762, + "logits/rejected": -0.4366786777973175, + "logps/chosen": -0.7910157442092896, + "logps/rejected": -0.8630884885787964, + "loss": 1.3166, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.582031488418579, + "rewards/margins": 0.14414538443088531, + "rewards/rejected": -1.7261769771575928, + "step": 49 + }, + { + "epoch": 0.10571522960026429, + "grad_norm": 0.5333903431892395, + "learning_rate": 9.999451015497595e-07, + "logits/chosen": -0.389942467212677, + "logits/rejected": -0.36674585938453674, + "logps/chosen": -0.7312074899673462, + "logps/rejected": -0.7289648652076721, + "loss": 1.4225, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4624149799346924, + "rewards/margins": -0.004485193639993668, + "rewards/rejected": -1.4579297304153442, + "step": 50 + }, + { + "epoch": 0.10782953419226958, + "grad_norm": 0.5712242722511292, + "learning_rate": 9.9987648131247e-07, + "logits/chosen": -0.4622853994369507, + "logits/rejected": -0.3728552460670471, + "logps/chosen": -0.8764299750328064, + "logps/rejected": -0.869678795337677, + "loss": 1.4542, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.7528599500656128, + "rewards/margins": -0.013502337038516998, + "rewards/rejected": -1.739357590675354, + "step": 51 + }, + { + "epoch": 0.10994383878427486, + "grad_norm": 0.2586441934108734, + "learning_rate": 9.99780418254397e-07, + "logits/chosen": -0.37249019742012024, + "logits/rejected": -0.3998304605484009, + "logps/chosen": -0.8435611724853516, + "logps/rejected": -0.9359882473945618, + "loss": 1.3057, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6871223449707031, + "rewards/margins": 0.18485431373119354, + "rewards/rejected": -1.8719764947891235, + "step": 52 + }, + { + "epoch": 0.11205814337628014, + "grad_norm": 1.0829113721847534, + "learning_rate": 9.996569176493268e-07, + "logits/chosen": -0.47697725892066956, + "logits/rejected": -0.4208195209503174, + "logps/chosen": -0.8014968037605286, + "logps/rejected": -0.8703804612159729, + "loss": 1.3523, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6029936075210571, + "rewards/margins": 0.1377674788236618, + "rewards/rejected": -1.7407609224319458, + "step": 53 + }, + { + "epoch": 0.11417244796828543, + "grad_norm": 0.5523208379745483, + "learning_rate": 9.995059862773438e-07, + "logits/chosen": -0.40533363819122314, + "logits/rejected": -0.36801978945732117, + "logps/chosen": -0.7641825675964355, + "logps/rejected": -0.8168596029281616, + "loss": 1.3692, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.528365135192871, + "rewards/margins": 0.10535416752099991, + "rewards/rejected": -1.6337192058563232, + "step": 54 + }, + { + "epoch": 0.11628675256029072, + "grad_norm": 0.614101767539978, + "learning_rate": 9.993276324244605e-07, + "logits/chosen": -0.4476906955242157, + "logits/rejected": -0.40396648645401, + "logps/chosen": -0.8706808090209961, + "logps/rejected": -0.9221430420875549, + "loss": 1.3787, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7413616180419922, + "rewards/margins": 0.10292442888021469, + "rewards/rejected": -1.8442860841751099, + "step": 55 + }, + { + "epoch": 0.118401057152296, + "grad_norm": 0.3428778052330017, + "learning_rate": 9.991218658821608e-07, + "logits/chosen": -0.31709593534469604, + "logits/rejected": -0.2760937213897705, + "logps/chosen": -0.842248797416687, + "logps/rejected": -0.8068034648895264, + "loss": 1.498, + "rewards/accuracies": 0.4375, + "rewards/chosen": -1.684497594833374, + "rewards/margins": -0.07089066505432129, + "rewards/rejected": -1.6136069297790527, + "step": 56 + }, + { + "epoch": 0.12051536174430129, + "grad_norm": 0.6877723932266235, + "learning_rate": 9.988886979468643e-07, + "logits/chosen": -0.41800016164779663, + "logits/rejected": -0.4011584222316742, + "logps/chosen": -0.7845420837402344, + "logps/rejected": -0.834447979927063, + "loss": 1.3491, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5690841674804688, + "rewards/margins": 0.09981165081262589, + "rewards/rejected": -1.668895959854126, + "step": 57 + }, + { + "epoch": 0.12262966633630658, + "grad_norm": 0.9649701714515686, + "learning_rate": 9.98628141419305e-07, + "logits/chosen": -0.4253537058830261, + "logits/rejected": -0.4305458962917328, + "logps/chosen": -0.86476731300354, + "logps/rejected": -0.9080386161804199, + "loss": 1.3639, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.72953462600708, + "rewards/margins": 0.08654248714447021, + "rewards/rejected": -1.8160772323608398, + "step": 58 + }, + { + "epoch": 0.12474397092831185, + "grad_norm": 1.3779780864715576, + "learning_rate": 9.98340210603829e-07, + "logits/chosen": -0.39970022439956665, + "logits/rejected": -0.441428005695343, + "logps/chosen": -0.8662775158882141, + "logps/rejected": -0.9646260738372803, + "loss": 1.3001, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7325550317764282, + "rewards/margins": 0.19669723510742188, + "rewards/rejected": -1.9292521476745605, + "step": 59 + }, + { + "epoch": 0.12685827552031714, + "grad_norm": 0.5366966724395752, + "learning_rate": 9.980249213076084e-07, + "logits/chosen": -0.37770116329193115, + "logits/rejected": -0.35231757164001465, + "logps/chosen": -0.8165755867958069, + "logps/rejected": -0.8619179129600525, + "loss": 1.3699, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6331511735916138, + "rewards/margins": 0.09068439900875092, + "rewards/rejected": -1.723835825920105, + "step": 60 + }, + { + "epoch": 0.12897258011232243, + "grad_norm": 0.36810922622680664, + "learning_rate": 9.976822908397748e-07, + "logits/chosen": -0.4224976897239685, + "logits/rejected": -0.41758257150650024, + "logps/chosen": -0.8445641994476318, + "logps/rejected": -0.9393664598464966, + "loss": 1.3193, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6891283988952637, + "rewards/margins": 0.18960458040237427, + "rewards/rejected": -1.8787329196929932, + "step": 61 + }, + { + "epoch": 0.13108688470432772, + "grad_norm": 0.6838279366493225, + "learning_rate": 9.97312338010468e-07, + "logits/chosen": -0.4168627858161926, + "logits/rejected": -0.36115381121635437, + "logps/chosen": -0.8370552659034729, + "logps/rejected": -0.8352169394493103, + "loss": 1.4284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6741105318069458, + "rewards/margins": -0.0036766715347766876, + "rewards/rejected": -1.6704338788986206, + "step": 62 + }, + { + "epoch": 0.133201189296333, + "grad_norm": 0.39330533146858215, + "learning_rate": 9.969150831298037e-07, + "logits/chosen": -0.4558233618736267, + "logits/rejected": -0.4025765061378479, + "logps/chosen": -0.826255738735199, + "logps/rejected": -0.894213080406189, + "loss": 1.3485, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.652511477470398, + "rewards/margins": 0.13591471314430237, + "rewards/rejected": -1.788426160812378, + "step": 63 + }, + { + "epoch": 0.1353154938883383, + "grad_norm": 0.6055929660797119, + "learning_rate": 9.964905480067584e-07, + "logits/chosen": -0.459463506937027, + "logits/rejected": -0.42943331599235535, + "logps/chosen": -0.7901928424835205, + "logps/rejected": -0.7964221239089966, + "loss": 1.4057, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.580385684967041, + "rewards/margins": 0.012458762153983116, + "rewards/rejected": -1.5928442478179932, + "step": 64 + }, + { + "epoch": 0.13742979848034356, + "grad_norm": 0.37883859872817993, + "learning_rate": 9.960387559479725e-07, + "logits/chosen": -0.4447207450866699, + "logits/rejected": -0.371269553899765, + "logps/chosen": -0.7863065004348755, + "logps/rejected": -0.7983666658401489, + "loss": 1.4202, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.572613000869751, + "rewards/margins": 0.02412020042538643, + "rewards/rejected": -1.5967333316802979, + "step": 65 + }, + { + "epoch": 0.13954410307234885, + "grad_norm": 0.31330156326293945, + "learning_rate": 9.955597317564703e-07, + "logits/chosen": -0.42059677839279175, + "logits/rejected": -0.37605100870132446, + "logps/chosen": -0.7669360637664795, + "logps/rejected": -0.8348797559738159, + "loss": 1.3368, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.533872127532959, + "rewards/margins": 0.13588732481002808, + "rewards/rejected": -1.6697595119476318, + "step": 66 + }, + { + "epoch": 0.14165840766435414, + "grad_norm": 0.4353170096874237, + "learning_rate": 9.950535017302983e-07, + "logits/chosen": -0.3897082805633545, + "logits/rejected": -0.38229796290397644, + "logps/chosen": -0.7249190807342529, + "logps/rejected": -0.7696882486343384, + "loss": 1.3511, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4498381614685059, + "rewards/margins": 0.08953814208507538, + "rewards/rejected": -1.5393764972686768, + "step": 67 + }, + { + "epoch": 0.14377271225635943, + "grad_norm": 0.6724106669425964, + "learning_rate": 9.94520093661082e-07, + "logits/chosen": -0.3687596023082733, + "logits/rejected": -0.34222811460494995, + "logps/chosen": -0.7845972776412964, + "logps/rejected": -0.8308086395263672, + "loss": 1.3737, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5691945552825928, + "rewards/margins": 0.09242270141839981, + "rewards/rejected": -1.6616172790527344, + "step": 68 + }, + { + "epoch": 0.14588701684836472, + "grad_norm": 0.7312172651290894, + "learning_rate": 9.939595368324994e-07, + "logits/chosen": -0.4475817382335663, + "logits/rejected": -0.3975730538368225, + "logps/chosen": -0.7314785718917847, + "logps/rejected": -0.7924487590789795, + "loss": 1.3439, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4629571437835693, + "rewards/margins": 0.12194043397903442, + "rewards/rejected": -1.584897518157959, + "step": 69 + }, + { + "epoch": 0.14800132144037, + "grad_norm": 0.301097571849823, + "learning_rate": 9.933718620186744e-07, + "logits/chosen": -0.402032732963562, + "logits/rejected": -0.3640722632408142, + "logps/chosen": -0.7727882862091064, + "logps/rejected": -0.8291516304016113, + "loss": 1.358, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.545576572418213, + "rewards/margins": 0.11272668838500977, + "rewards/rejected": -1.6583032608032227, + "step": 70 + }, + { + "epoch": 0.1501156260323753, + "grad_norm": 0.377835750579834, + "learning_rate": 9.92757101482486e-07, + "logits/chosen": -0.316825270652771, + "logits/rejected": -0.3245603144168854, + "logps/chosen": -0.7962774634361267, + "logps/rejected": -0.8610175848007202, + "loss": 1.3464, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5925549268722534, + "rewards/margins": 0.12948019802570343, + "rewards/rejected": -1.7220351696014404, + "step": 71 + }, + { + "epoch": 0.1522299306243806, + "grad_norm": 0.84058678150177, + "learning_rate": 9.921152889737984e-07, + "logits/chosen": -0.4446060359477997, + "logits/rejected": -0.43160340189933777, + "logps/chosen": -0.7745426297187805, + "logps/rejected": -0.8286185264587402, + "loss": 1.3408, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.549085259437561, + "rewards/margins": 0.10815180093050003, + "rewards/rejected": -1.6572370529174805, + "step": 72 + }, + { + "epoch": 0.15434423521638585, + "grad_norm": 0.6970808506011963, + "learning_rate": 9.91446459727607e-07, + "logits/chosen": -0.4220297634601593, + "logits/rejected": -0.404453307390213, + "logps/chosen": -0.8769615888595581, + "logps/rejected": -0.9564313292503357, + "loss": 1.3423, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7539231777191162, + "rewards/margins": 0.15893957018852234, + "rewards/rejected": -1.9128626585006714, + "step": 73 + }, + { + "epoch": 0.15645853980839114, + "grad_norm": 0.5900676250457764, + "learning_rate": 9.90750650462105e-07, + "logits/chosen": -0.41884100437164307, + "logits/rejected": -0.38551777601242065, + "logps/chosen": -0.814996063709259, + "logps/rejected": -0.8892688751220703, + "loss": 1.3325, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.629992127418518, + "rewards/margins": 0.14854571223258972, + "rewards/rejected": -1.7785377502441406, + "step": 74 + }, + { + "epoch": 0.15857284440039643, + "grad_norm": 0.7245749831199646, + "learning_rate": 9.900278993766668e-07, + "logits/chosen": -0.3451727330684662, + "logits/rejected": -0.3348972201347351, + "logps/chosen": -0.8788102865219116, + "logps/rejected": -0.9273182153701782, + "loss": 1.377, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7576205730438232, + "rewards/margins": 0.09701582789421082, + "rewards/rejected": -1.8546364307403564, + "step": 75 + }, + { + "epoch": 0.16068714899240172, + "grad_norm": 0.3135383725166321, + "learning_rate": 9.89278246149752e-07, + "logits/chosen": -0.4140404760837555, + "logits/rejected": -0.38082340359687805, + "logps/chosen": -0.7513999342918396, + "logps/rejected": -0.7880118489265442, + "loss": 1.3921, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5027998685836792, + "rewards/margins": 0.07322371751070023, + "rewards/rejected": -1.5760236978530884, + "step": 76 + }, + { + "epoch": 0.162801453584407, + "grad_norm": 0.8594076633453369, + "learning_rate": 9.885017319367252e-07, + "logits/chosen": -0.35951656103134155, + "logits/rejected": -0.30456626415252686, + "logps/chosen": -0.7989844679832458, + "logps/rejected": -0.8145395517349243, + "loss": 1.4061, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5979689359664917, + "rewards/margins": 0.031110182404518127, + "rewards/rejected": -1.6290791034698486, + "step": 77 + }, + { + "epoch": 0.1649157581764123, + "grad_norm": 0.4393538534641266, + "learning_rate": 9.876983993675989e-07, + "logits/chosen": -0.33119240403175354, + "logits/rejected": -0.31970253586769104, + "logps/chosen": -0.721772313117981, + "logps/rejected": -0.7868390679359436, + "loss": 1.3325, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.443544626235962, + "rewards/margins": 0.13013358414173126, + "rewards/rejected": -1.5736781358718872, + "step": 78 + }, + { + "epoch": 0.1670300627684176, + "grad_norm": 0.8017925024032593, + "learning_rate": 9.868682925446909e-07, + "logits/chosen": -0.3608989417552948, + "logits/rejected": -0.3497124910354614, + "logps/chosen": -0.8117240071296692, + "logps/rejected": -0.8506529331207275, + "loss": 1.3671, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6234480142593384, + "rewards/margins": 0.07785768806934357, + "rewards/rejected": -1.701305866241455, + "step": 79 + }, + { + "epoch": 0.16914436736042285, + "grad_norm": 0.5704994201660156, + "learning_rate": 9.860114570402054e-07, + "logits/chosen": -0.37441548705101013, + "logits/rejected": -0.32188406586647034, + "logps/chosen": -0.745419442653656, + "logps/rejected": -0.816170871257782, + "loss": 1.3315, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.490838885307312, + "rewards/margins": 0.1415030062198639, + "rewards/rejected": -1.632341742515564, + "step": 80 + }, + { + "epoch": 0.17125867195242814, + "grad_norm": 3.0005106925964355, + "learning_rate": 9.85127939893729e-07, + "logits/chosen": -0.34791454672813416, + "logits/rejected": -0.32542383670806885, + "logps/chosen": -0.7547991275787354, + "logps/rejected": -0.7868378162384033, + "loss": 1.3742, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5095982551574707, + "rewards/margins": 0.06407731771469116, + "rewards/rejected": -1.5736756324768066, + "step": 81 + }, + { + "epoch": 0.17337297654443343, + "grad_norm": 0.5891271829605103, + "learning_rate": 9.842177896096493e-07, + "logits/chosen": -0.38649702072143555, + "logits/rejected": -0.36892226338386536, + "logps/chosen": -0.7556143999099731, + "logps/rejected": -0.82858806848526, + "loss": 1.3233, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5112287998199463, + "rewards/margins": 0.14594702422618866, + "rewards/rejected": -1.65717613697052, + "step": 82 + }, + { + "epoch": 0.17548728113643872, + "grad_norm": 0.27861157059669495, + "learning_rate": 9.832810561544923e-07, + "logits/chosen": -0.38264670968055725, + "logits/rejected": -0.35908499360084534, + "logps/chosen": -0.7858557105064392, + "logps/rejected": -0.8571599721908569, + "loss": 1.3234, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5717114210128784, + "rewards/margins": 0.1426086127758026, + "rewards/rejected": -1.7143199443817139, + "step": 83 + }, + { + "epoch": 0.177601585728444, + "grad_norm": 0.8158763647079468, + "learning_rate": 9.823177909541793e-07, + "logits/chosen": -0.4076104760169983, + "logits/rejected": -0.3934200704097748, + "logps/chosen": -0.8089872002601624, + "logps/rejected": -0.8885407447814941, + "loss": 1.3476, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6179744005203247, + "rewards/margins": 0.15910708904266357, + "rewards/rejected": -1.7770814895629883, + "step": 84 + }, + { + "epoch": 0.1797158903204493, + "grad_norm": 0.46008333563804626, + "learning_rate": 9.813280468912022e-07, + "logits/chosen": -0.33124151825904846, + "logits/rejected": -0.34535717964172363, + "logps/chosen": -0.733020544052124, + "logps/rejected": -0.8716557621955872, + "loss": 1.2807, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.466041088104248, + "rewards/margins": 0.27727028727531433, + "rewards/rejected": -1.7433115243911743, + "step": 85 + }, + { + "epoch": 0.18183019491245458, + "grad_norm": 0.3784334659576416, + "learning_rate": 9.80311878301722e-07, + "logits/chosen": -0.40713849663734436, + "logits/rejected": -0.3808574080467224, + "logps/chosen": -0.7063947319984436, + "logps/rejected": -0.7589148879051208, + "loss": 1.3501, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4127894639968872, + "rewards/margins": 0.10504024475812912, + "rewards/rejected": -1.5178297758102417, + "step": 86 + }, + { + "epoch": 0.18394449950445987, + "grad_norm": 0.6137813329696655, + "learning_rate": 9.792693409725853e-07, + "logits/chosen": -0.4119255542755127, + "logits/rejected": -0.44221603870391846, + "logps/chosen": -0.795850932598114, + "logps/rejected": -0.8925026059150696, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.591701865196228, + "rewards/margins": 0.19330324232578278, + "rewards/rejected": -1.7850052118301392, + "step": 87 + }, + { + "epoch": 0.18605880409646514, + "grad_norm": 0.5354250073432922, + "learning_rate": 9.78200492138261e-07, + "logits/chosen": -0.3792279064655304, + "logits/rejected": -0.3789527714252472, + "logps/chosen": -0.7249161005020142, + "logps/rejected": -0.8088154792785645, + "loss": 1.3304, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4498322010040283, + "rewards/margins": 0.16779886186122894, + "rewards/rejected": -1.617630958557129, + "step": 88 + }, + { + "epoch": 0.18817310868847043, + "grad_norm": 0.6911243796348572, + "learning_rate": 9.771053904776995e-07, + "logits/chosen": -0.38837429881095886, + "logits/rejected": -0.36597418785095215, + "logps/chosen": -0.7528612017631531, + "logps/rejected": -0.7981135249137878, + "loss": 1.3481, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.5057224035263062, + "rewards/margins": 0.09050464630126953, + "rewards/rejected": -1.5962270498275757, + "step": 89 + }, + { + "epoch": 0.19028741328047571, + "grad_norm": 0.37110790610313416, + "learning_rate": 9.759840961111097e-07, + "logits/chosen": -0.3804919421672821, + "logits/rejected": -0.38750600814819336, + "logps/chosen": -0.8673248291015625, + "logps/rejected": -0.9381619691848755, + "loss": 1.3303, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.734649658203125, + "rewards/margins": 0.14167429506778717, + "rewards/rejected": -1.876323938369751, + "step": 90 + }, + { + "epoch": 0.192401717872481, + "grad_norm": 0.8033086657524109, + "learning_rate": 9.748366705966593e-07, + "logits/chosen": -0.3804866075515747, + "logits/rejected": -0.31055447459220886, + "logps/chosen": -0.7535511255264282, + "logps/rejected": -0.7824290990829468, + "loss": 1.3706, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5071022510528564, + "rewards/margins": 0.057755980640649796, + "rewards/rejected": -1.5648581981658936, + "step": 91 + }, + { + "epoch": 0.1945160224644863, + "grad_norm": 0.6934167742729187, + "learning_rate": 9.736631769270957e-07, + "logits/chosen": -0.443461149930954, + "logits/rejected": -0.4398806691169739, + "logps/chosen": -0.8123858571052551, + "logps/rejected": -0.8972252607345581, + "loss": 1.3464, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6247717142105103, + "rewards/margins": 0.16967862844467163, + "rewards/rejected": -1.7944505214691162, + "step": 92 + }, + { + "epoch": 0.19663032705649158, + "grad_norm": 1.140067458152771, + "learning_rate": 9.724636795262866e-07, + "logits/chosen": -0.43793433904647827, + "logits/rejected": -0.4402340352535248, + "logps/chosen": -0.8155819177627563, + "logps/rejected": -0.8659977912902832, + "loss": 1.3621, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6311638355255127, + "rewards/margins": 0.10083187371492386, + "rewards/rejected": -1.7319955825805664, + "step": 93 + }, + { + "epoch": 0.19874463164849687, + "grad_norm": 1.8303897380828857, + "learning_rate": 9.712382442456844e-07, + "logits/chosen": -0.34288379549980164, + "logits/rejected": -0.36632782220840454, + "logps/chosen": -0.7338054776191711, + "logps/rejected": -0.8537961840629578, + "loss": 1.2942, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4676109552383423, + "rewards/margins": 0.23998141288757324, + "rewards/rejected": -1.7075923681259155, + "step": 94 + }, + { + "epoch": 0.20085893624050213, + "grad_norm": 0.34392252564430237, + "learning_rate": 9.6998693836071e-07, + "logits/chosen": -0.4381723999977112, + "logits/rejected": -0.4031081199645996, + "logps/chosen": -0.7130292057991028, + "logps/rejected": -0.7402217388153076, + "loss": 1.3766, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4260584115982056, + "rewards/margins": 0.0543849840760231, + "rewards/rejected": -1.4804434776306152, + "step": 95 + }, + { + "epoch": 0.20297324083250742, + "grad_norm": 0.4129842519760132, + "learning_rate": 9.687098305670604e-07, + "logits/chosen": -0.39796924591064453, + "logits/rejected": -0.3476859927177429, + "logps/chosen": -0.7520885467529297, + "logps/rejected": -0.8058558702468872, + "loss": 1.3663, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5041770935058594, + "rewards/margins": 0.10753461718559265, + "rewards/rejected": -1.6117117404937744, + "step": 96 + }, + { + "epoch": 0.2050875454245127, + "grad_norm": 0.5054985284805298, + "learning_rate": 9.674069909769362e-07, + "logits/chosen": -0.3942393660545349, + "logits/rejected": -0.3627544045448303, + "logps/chosen": -0.733702540397644, + "logps/rejected": -0.781308650970459, + "loss": 1.342, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.467405080795288, + "rewards/margins": 0.09521210938692093, + "rewards/rejected": -1.562617301940918, + "step": 97 + }, + { + "epoch": 0.207201850016518, + "grad_norm": 0.6975870728492737, + "learning_rate": 9.66078491115194e-07, + "logits/chosen": -0.38557127118110657, + "logits/rejected": -0.3581204414367676, + "logps/chosen": -0.7359838485717773, + "logps/rejected": -0.7648134827613831, + "loss": 1.3841, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.4719676971435547, + "rewards/margins": 0.057659298181533813, + "rewards/rejected": -1.5296269655227661, + "step": 98 + }, + { + "epoch": 0.2093161546085233, + "grad_norm": 0.7575029730796814, + "learning_rate": 9.647244039154177e-07, + "logits/chosen": -0.3871467411518097, + "logits/rejected": -0.3941374123096466, + "logps/chosen": -0.6516871452331543, + "logps/rejected": -0.7066073417663574, + "loss": 1.3364, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3033742904663086, + "rewards/margins": 0.10984040796756744, + "rewards/rejected": -1.4132146835327148, + "step": 99 + }, + { + "epoch": 0.21143045920052858, + "grad_norm": 1.3344250917434692, + "learning_rate": 9.633448037159166e-07, + "logits/chosen": -0.40887755155563354, + "logits/rejected": -0.41733911633491516, + "logps/chosen": -0.6978950500488281, + "logps/rejected": -0.793424129486084, + "loss": 1.3076, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3957901000976562, + "rewards/margins": 0.1910584717988968, + "rewards/rejected": -1.586848258972168, + "step": 100 + }, + { + "epoch": 0.21354476379253387, + "grad_norm": 0.8798456788063049, + "learning_rate": 9.619397662556433e-07, + "logits/chosen": -0.302534282207489, + "logits/rejected": -0.29954588413238525, + "logps/chosen": -0.719552755355835, + "logps/rejected": -0.7628123164176941, + "loss": 1.3699, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.43910551071167, + "rewards/margins": 0.08651915192604065, + "rewards/rejected": -1.5256246328353882, + "step": 101 + }, + { + "epoch": 0.21565906838453916, + "grad_norm": 0.8746365308761597, + "learning_rate": 9.605093686700353e-07, + "logits/chosen": -0.372263640165329, + "logits/rejected": -0.3714321255683899, + "logps/chosen": -0.6665956974029541, + "logps/rejected": -0.7361368536949158, + "loss": 1.3173, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3331913948059082, + "rewards/margins": 0.1390824019908905, + "rewards/rejected": -1.4722737073898315, + "step": 102 + }, + { + "epoch": 0.21777337297654442, + "grad_norm": 0.8450930714607239, + "learning_rate": 9.590536894867812e-07, + "logits/chosen": -0.37228280305862427, + "logits/rejected": -0.37763556838035583, + "logps/chosen": -0.7425979375839233, + "logps/rejected": -0.7557005882263184, + "loss": 1.4085, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4851958751678467, + "rewards/margins": 0.026205357164144516, + "rewards/rejected": -1.5114011764526367, + "step": 103 + }, + { + "epoch": 0.2198876775685497, + "grad_norm": 0.5075035691261292, + "learning_rate": 9.575728086215091e-07, + "logits/chosen": -0.4433964788913727, + "logits/rejected": -0.3782787024974823, + "logps/chosen": -0.7308244109153748, + "logps/rejected": -0.8043883442878723, + "loss": 1.3353, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4616488218307495, + "rewards/margins": 0.14712783694267273, + "rewards/rejected": -1.6087766885757446, + "step": 104 + }, + { + "epoch": 0.222001982160555, + "grad_norm": 1.0270946025848389, + "learning_rate": 9.560668073733993e-07, + "logits/chosen": -0.3593980073928833, + "logits/rejected": -0.3159312903881073, + "logps/chosen": -0.757469892501831, + "logps/rejected": -0.8256179094314575, + "loss": 1.3289, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.514939785003662, + "rewards/margins": 0.13629598915576935, + "rewards/rejected": -1.651235818862915, + "step": 105 + }, + { + "epoch": 0.2241162867525603, + "grad_norm": 0.8390078544616699, + "learning_rate": 9.54535768420721e-07, + "logits/chosen": -0.3266332149505615, + "logits/rejected": -0.3008713722229004, + "logps/chosen": -0.7286102771759033, + "logps/rejected": -0.7803273797035217, + "loss": 1.3593, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4572205543518066, + "rewards/margins": 0.10343428701162338, + "rewards/rejected": -1.5606547594070435, + "step": 106 + }, + { + "epoch": 0.22623059134456558, + "grad_norm": 1.130595088005066, + "learning_rate": 9.529797758162934e-07, + "logits/chosen": -0.36109817028045654, + "logits/rejected": -0.34797021746635437, + "logps/chosen": -0.7723361253738403, + "logps/rejected": -0.8873662352561951, + "loss": 1.2956, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5446722507476807, + "rewards/margins": 0.23006024956703186, + "rewards/rejected": -1.7747324705123901, + "step": 107 + }, + { + "epoch": 0.22834489593657087, + "grad_norm": 0.9399718642234802, + "learning_rate": 9.513989149828717e-07, + "logits/chosen": -0.3596777021884918, + "logits/rejected": -0.3660539388656616, + "logps/chosen": -0.7130635976791382, + "logps/rejected": -0.7378955483436584, + "loss": 1.3774, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4261271953582764, + "rewards/margins": 0.04966379329562187, + "rewards/rejected": -1.475791096687317, + "step": 108 + }, + { + "epoch": 0.23045920052857616, + "grad_norm": 1.097594976425171, + "learning_rate": 9.49793272708457e-07, + "logits/chosen": -0.31783169507980347, + "logits/rejected": -0.3008044362068176, + "logps/chosen": -0.6933202147483826, + "logps/rejected": -0.7510000467300415, + "loss": 1.3498, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3866404294967651, + "rewards/margins": 0.1153596043586731, + "rewards/rejected": -1.502000093460083, + "step": 109 + }, + { + "epoch": 0.23257350512058145, + "grad_norm": 1.096330165863037, + "learning_rate": 9.481629371415313e-07, + "logits/chosen": -0.3582899570465088, + "logits/rejected": -0.3120020031929016, + "logps/chosen": -0.817268431186676, + "logps/rejected": -0.8862374424934387, + "loss": 1.3349, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.634536862373352, + "rewards/margins": 0.13793781399726868, + "rewards/rejected": -1.7724748849868774, + "step": 110 + }, + { + "epoch": 0.2346878097125867, + "grad_norm": 0.8261978626251221, + "learning_rate": 9.465079977862192e-07, + "logits/chosen": -0.41336673498153687, + "logits/rejected": -0.39544352889060974, + "logps/chosen": -0.7673372030258179, + "logps/rejected": -0.8331737518310547, + "loss": 1.3373, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5346744060516357, + "rewards/margins": 0.1316729635000229, + "rewards/rejected": -1.6663475036621094, + "step": 111 + }, + { + "epoch": 0.236802114304592, + "grad_norm": 0.5922806262969971, + "learning_rate": 9.448285454973737e-07, + "logits/chosen": -0.3224758207798004, + "logits/rejected": -0.3118049204349518, + "logps/chosen": -0.7584627866744995, + "logps/rejected": -0.8859898447990417, + "loss": 1.2731, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.516925573348999, + "rewards/margins": 0.2550540566444397, + "rewards/rejected": -1.7719796895980835, + "step": 112 + }, + { + "epoch": 0.2389164188965973, + "grad_norm": 1.3172541856765747, + "learning_rate": 9.431246724755877e-07, + "logits/chosen": -0.4287208318710327, + "logits/rejected": -0.3984590172767639, + "logps/chosen": -0.7587048411369324, + "logps/rejected": -0.7860502004623413, + "loss": 1.3832, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5174096822738647, + "rewards/margins": 0.05469079315662384, + "rewards/rejected": -1.5721004009246826, + "step": 113 + }, + { + "epoch": 0.24103072348860258, + "grad_norm": 0.7749882340431213, + "learning_rate": 9.413964722621337e-07, + "logits/chosen": -0.39085906744003296, + "logits/rejected": -0.3316206932067871, + "logps/chosen": -0.7035898566246033, + "logps/rejected": -0.7375759482383728, + "loss": 1.387, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4071797132492065, + "rewards/margins": 0.06797221302986145, + "rewards/rejected": -1.4751518964767456, + "step": 114 + }, + { + "epoch": 0.24314502808060787, + "grad_norm": 1.0914056301116943, + "learning_rate": 9.396440397338272e-07, + "logits/chosen": -0.38826486468315125, + "logits/rejected": -0.35520774126052856, + "logps/chosen": -0.7385872602462769, + "logps/rejected": -0.7974889278411865, + "loss": 1.3477, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4771745204925537, + "rewards/margins": 0.11780343949794769, + "rewards/rejected": -1.594977855682373, + "step": 115 + }, + { + "epoch": 0.24525933267261316, + "grad_norm": 1.2966018915176392, + "learning_rate": 9.378674710978183e-07, + "logits/chosen": -0.36493802070617676, + "logits/rejected": -0.34763696789741516, + "logps/chosen": -0.6731826663017273, + "logps/rejected": -0.7645149827003479, + "loss": 1.3, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3463653326034546, + "rewards/margins": 0.18266455829143524, + "rewards/rejected": -1.5290299654006958, + "step": 116 + }, + { + "epoch": 0.24737363726461845, + "grad_norm": 0.49401605129241943, + "learning_rate": 9.360668638863109e-07, + "logits/chosen": -0.40416795015335083, + "logits/rejected": -0.3815993070602417, + "logps/chosen": -0.719497799873352, + "logps/rejected": -0.7588324546813965, + "loss": 1.3621, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.438995599746704, + "rewards/margins": 0.07866920530796051, + "rewards/rejected": -1.517664909362793, + "step": 117 + }, + { + "epoch": 0.2494879418566237, + "grad_norm": 1.0603238344192505, + "learning_rate": 9.342423169512071e-07, + "logits/chosen": -0.3857055604457855, + "logits/rejected": -0.3524513244628906, + "logps/chosen": -0.7373769283294678, + "logps/rejected": -0.7971038818359375, + "loss": 1.3358, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4747538566589355, + "rewards/margins": 0.11945393681526184, + "rewards/rejected": -1.594207763671875, + "step": 118 + }, + { + "epoch": 0.251602246448629, + "grad_norm": 0.9880490303039551, + "learning_rate": 9.323939304586804e-07, + "logits/chosen": -0.31455785036087036, + "logits/rejected": -0.3102484941482544, + "logps/chosen": -0.7276102900505066, + "logps/rejected": -0.7446941137313843, + "loss": 1.3928, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.4552205801010132, + "rewards/margins": 0.034167706966400146, + "rewards/rejected": -1.4893882274627686, + "step": 119 + }, + { + "epoch": 0.2537165510406343, + "grad_norm": 1.0419566631317139, + "learning_rate": 9.305218058836776e-07, + "logits/chosen": -0.38093918561935425, + "logits/rejected": -0.3588898181915283, + "logps/chosen": -0.715582013130188, + "logps/rejected": -0.8271002769470215, + "loss": 1.2934, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.431164026260376, + "rewards/margins": 0.22303667664527893, + "rewards/rejected": -1.654200553894043, + "step": 120 + }, + { + "epoch": 0.2558308556326396, + "grad_norm": 0.657620370388031, + "learning_rate": 9.286260460043473e-07, + "logits/chosen": -0.45690783858299255, + "logits/rejected": -0.4082674980163574, + "logps/chosen": -0.6932571530342102, + "logps/rejected": -0.7631082534790039, + "loss": 1.3398, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3865143060684204, + "rewards/margins": 0.13970226049423218, + "rewards/rejected": -1.5262165069580078, + "step": 121 + }, + { + "epoch": 0.25794516022464486, + "grad_norm": 0.983686089515686, + "learning_rate": 9.267067548963974e-07, + "logits/chosen": -0.40266987681388855, + "logits/rejected": -0.37586671113967896, + "logps/chosen": -0.7362720966339111, + "logps/rejected": -0.7538987398147583, + "loss": 1.4066, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4725441932678223, + "rewards/margins": 0.03525342047214508, + "rewards/rejected": -1.5077974796295166, + "step": 122 + }, + { + "epoch": 0.26005946481665015, + "grad_norm": 1.0076361894607544, + "learning_rate": 9.24764037927381e-07, + "logits/chosen": -0.4461461007595062, + "logits/rejected": -0.40700826048851013, + "logps/chosen": -0.7206646800041199, + "logps/rejected": -0.7489192485809326, + "loss": 1.3759, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4413293600082397, + "rewards/margins": 0.05650928616523743, + "rewards/rejected": -1.4978384971618652, + "step": 123 + }, + { + "epoch": 0.26217376940865544, + "grad_norm": 0.933315098285675, + "learning_rate": 9.22798001750913e-07, + "logits/chosen": -0.3966676890850067, + "logits/rejected": -0.3572196960449219, + "logps/chosen": -0.7075096368789673, + "logps/rejected": -0.7406759262084961, + "loss": 1.3667, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4150192737579346, + "rewards/margins": 0.06633266806602478, + "rewards/rejected": -1.4813518524169922, + "step": 124 + }, + { + "epoch": 0.26428807400066073, + "grad_norm": 0.6277392506599426, + "learning_rate": 9.20808754300814e-07, + "logits/chosen": -0.3555490970611572, + "logits/rejected": -0.35786163806915283, + "logps/chosen": -0.7549921274185181, + "logps/rejected": -0.832869291305542, + "loss": 1.3175, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.5099842548370361, + "rewards/margins": 0.15575438737869263, + "rewards/rejected": -1.665738582611084, + "step": 125 + }, + { + "epoch": 0.266402378592666, + "grad_norm": 0.7172744274139404, + "learning_rate": 9.18796404785185e-07, + "logits/chosen": -0.41230690479278564, + "logits/rejected": -0.39935630559921265, + "logps/chosen": -0.7129833698272705, + "logps/rejected": -0.7888559103012085, + "loss": 1.3167, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.425966739654541, + "rewards/margins": 0.15174514055252075, + "rewards/rejected": -1.577711820602417, + "step": 126 + }, + { + "epoch": 0.2685166831846713, + "grad_norm": 1.2629508972167969, + "learning_rate": 9.16761063680412e-07, + "logits/chosen": -0.36754000186920166, + "logits/rejected": -0.3541562259197235, + "logps/chosen": -0.6992133855819702, + "logps/rejected": -0.7668892741203308, + "loss": 1.3735, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3984267711639404, + "rewards/margins": 0.13535188138484955, + "rewards/rejected": -1.5337785482406616, + "step": 127 + }, + { + "epoch": 0.2706309877766766, + "grad_norm": 0.7024405598640442, + "learning_rate": 9.147028427251009e-07, + "logits/chosen": -0.4014585018157959, + "logits/rejected": -0.40560898184776306, + "logps/chosen": -0.727234959602356, + "logps/rejected": -0.8070081472396851, + "loss": 1.3138, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.454469919204712, + "rewards/margins": 0.15954652428627014, + "rewards/rejected": -1.6140162944793701, + "step": 128 + }, + { + "epoch": 0.2727452923686819, + "grad_norm": 1.6173532009124756, + "learning_rate": 9.126218549139433e-07, + "logits/chosen": -0.32572367787361145, + "logits/rejected": -0.3470613956451416, + "logps/chosen": -0.7555541396141052, + "logps/rejected": -0.8856738209724426, + "loss": 1.2461, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.5111082792282104, + "rewards/margins": 0.26023951172828674, + "rewards/rejected": -1.7713476419448853, + "step": 129 + }, + { + "epoch": 0.2748595969606871, + "grad_norm": 0.5878487229347229, + "learning_rate": 9.105182144915129e-07, + "logits/chosen": -0.39267170429229736, + "logits/rejected": -0.3448992967605591, + "logps/chosen": -0.6776289343833923, + "logps/rejected": -0.7530183792114258, + "loss": 1.3242, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3552578687667847, + "rewards/margins": 0.15077897906303406, + "rewards/rejected": -1.5060367584228516, + "step": 130 + }, + { + "epoch": 0.2769739015526924, + "grad_norm": 0.43264809250831604, + "learning_rate": 9.08392036945994e-07, + "logits/chosen": -0.39980950951576233, + "logits/rejected": -0.4247930645942688, + "logps/chosen": -0.7898982167243958, + "logps/rejected": -0.8856299519538879, + "loss": 1.3004, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5797964334487915, + "rewards/margins": 0.19146347045898438, + "rewards/rejected": -1.7712599039077759, + "step": 131 + }, + { + "epoch": 0.2790882061446977, + "grad_norm": 1.0348538160324097, + "learning_rate": 9.062434390028407e-07, + "logits/chosen": -0.35729700326919556, + "logits/rejected": -0.3265542984008789, + "logps/chosen": -0.7120587229728699, + "logps/rejected": -0.771691083908081, + "loss": 1.3374, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4241174459457397, + "rewards/margins": 0.11926469206809998, + "rewards/rejected": -1.543382167816162, + "step": 132 + }, + { + "epoch": 0.281202510736703, + "grad_norm": 2.0902225971221924, + "learning_rate": 9.04072538618369e-07, + "logits/chosen": -0.4942469298839569, + "logits/rejected": -0.48699846863746643, + "logps/chosen": -0.7882512211799622, + "logps/rejected": -0.8270165920257568, + "loss": 1.3715, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5765024423599243, + "rewards/margins": 0.07753071188926697, + "rewards/rejected": -1.6540331840515137, + "step": 133 + }, + { + "epoch": 0.2833168153287083, + "grad_norm": 1.6436113119125366, + "learning_rate": 9.018794549732817e-07, + "logits/chosen": -0.41133156418800354, + "logits/rejected": -0.4146718382835388, + "logps/chosen": -0.779824435710907, + "logps/rejected": -0.9421006441116333, + "loss": 1.2521, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.559648871421814, + "rewards/margins": 0.324552446603775, + "rewards/rejected": -1.8842012882232666, + "step": 134 + }, + { + "epoch": 0.28543111992071357, + "grad_norm": 0.8831859827041626, + "learning_rate": 8.996643084661244e-07, + "logits/chosen": -0.42452165484428406, + "logits/rejected": -0.3798604905605316, + "logps/chosen": -0.6499216556549072, + "logps/rejected": -0.7796702980995178, + "loss": 1.2581, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.2998433113098145, + "rewards/margins": 0.25949734449386597, + "rewards/rejected": -1.5593405961990356, + "step": 135 + }, + { + "epoch": 0.28754542451271886, + "grad_norm": 0.8031218647956848, + "learning_rate": 8.974272207066767e-07, + "logits/chosen": -0.38131940364837646, + "logits/rejected": -0.3854255676269531, + "logps/chosen": -0.7026851773262024, + "logps/rejected": -0.762391209602356, + "loss": 1.3333, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4053703546524048, + "rewards/margins": 0.11941206455230713, + "rewards/rejected": -1.524782419204712, + "step": 136 + }, + { + "epoch": 0.28965972910472415, + "grad_norm": 1.4455821514129639, + "learning_rate": 8.951683145092748e-07, + "logits/chosen": -0.42824965715408325, + "logits/rejected": -0.4320424795150757, + "logps/chosen": -0.7893270254135132, + "logps/rejected": -0.8517144322395325, + "loss": 1.3652, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5786540508270264, + "rewards/margins": 0.12477481365203857, + "rewards/rejected": -1.703428864479065, + "step": 137 + }, + { + "epoch": 0.29177403369672944, + "grad_norm": 0.6299450397491455, + "learning_rate": 8.928877138860706e-07, + "logits/chosen": -0.4388589560985565, + "logits/rejected": -0.40156903862953186, + "logps/chosen": -0.7346572875976562, + "logps/rejected": -0.8166492581367493, + "loss": 1.3134, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4693145751953125, + "rewards/margins": 0.16398391127586365, + "rewards/rejected": -1.6332985162734985, + "step": 138 + }, + { + "epoch": 0.29388833828873473, + "grad_norm": 2.784437417984009, + "learning_rate": 8.905855440402224e-07, + "logits/chosen": -0.405662477016449, + "logits/rejected": -0.35549795627593994, + "logps/chosen": -0.7482771277427673, + "logps/rejected": -0.795568585395813, + "loss": 1.3656, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.4965542554855347, + "rewards/margins": 0.09458285570144653, + "rewards/rejected": -1.591137170791626, + "step": 139 + }, + { + "epoch": 0.29600264288074, + "grad_norm": 0.4958692193031311, + "learning_rate": 8.882619313590212e-07, + "logits/chosen": -0.3814452886581421, + "logits/rejected": -0.35715553164482117, + "logps/chosen": -0.7731542587280273, + "logps/rejected": -0.8285202980041504, + "loss": 1.3776, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5463085174560547, + "rewards/margins": 0.11073210835456848, + "rewards/rejected": -1.6570405960083008, + "step": 140 + }, + { + "epoch": 0.2981169474727453, + "grad_norm": 0.4597362279891968, + "learning_rate": 8.859170034069532e-07, + "logits/chosen": -0.388383150100708, + "logits/rejected": -0.4071737229824066, + "logps/chosen": -0.7263504266738892, + "logps/rejected": -0.769676148891449, + "loss": 1.3712, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4527008533477783, + "rewards/margins": 0.08665145933628082, + "rewards/rejected": -1.539352297782898, + "step": 141 + }, + { + "epoch": 0.3002312520647506, + "grad_norm": 0.4914930760860443, + "learning_rate": 8.835508889186956e-07, + "logits/chosen": -0.41084378957748413, + "logits/rejected": -0.3823031187057495, + "logps/chosen": -0.7565821409225464, + "logps/rejected": -0.9084322452545166, + "loss": 1.2717, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5131642818450928, + "rewards/margins": 0.3037002384662628, + "rewards/rejected": -1.8168644905090332, + "step": 142 + }, + { + "epoch": 0.3023455566567559, + "grad_norm": 2.0075581073760986, + "learning_rate": 8.811637177920499e-07, + "logits/chosen": -0.4438302516937256, + "logits/rejected": -0.4916025698184967, + "logps/chosen": -0.800719141960144, + "logps/rejected": -0.8658267855644226, + "loss": 1.358, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.601438283920288, + "rewards/margins": 0.1302153617143631, + "rewards/rejected": -1.7316535711288452, + "step": 143 + }, + { + "epoch": 0.3044598612487612, + "grad_norm": 1.1243022680282593, + "learning_rate": 8.7875562108081e-07, + "logits/chosen": -0.40519949793815613, + "logits/rejected": -0.3905750811100006, + "logps/chosen": -0.689585268497467, + "logps/rejected": -0.7312421798706055, + "loss": 1.3503, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.379170536994934, + "rewards/margins": 0.08331384509801865, + "rewards/rejected": -1.462484359741211, + "step": 144 + }, + { + "epoch": 0.3065741658407664, + "grad_norm": 0.7543137669563293, + "learning_rate": 8.76326730987568e-07, + "logits/chosen": -0.4696752727031708, + "logits/rejected": -0.4357326626777649, + "logps/chosen": -0.7813425660133362, + "logps/rejected": -0.8276973962783813, + "loss": 1.3794, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5626851320266724, + "rewards/margins": 0.09270970523357391, + "rewards/rejected": -1.6553947925567627, + "step": 145 + }, + { + "epoch": 0.3086884704327717, + "grad_norm": 1.3136053085327148, + "learning_rate": 8.738771808564555e-07, + "logits/chosen": -0.4262731075286865, + "logits/rejected": -0.44038820266723633, + "logps/chosen": -0.697494387626648, + "logps/rejected": -0.8369535803794861, + "loss": 1.2699, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.394988775253296, + "rewards/margins": 0.2789183557033539, + "rewards/rejected": -1.6739071607589722, + "step": 146 + }, + { + "epoch": 0.310802775024777, + "grad_norm": 2.221562385559082, + "learning_rate": 8.714071051658245e-07, + "logits/chosen": -0.40089336037635803, + "logits/rejected": -0.37991875410079956, + "logps/chosen": -0.7704445123672485, + "logps/rejected": -0.859091579914093, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.540889024734497, + "rewards/margins": 0.17729414999485016, + "rewards/rejected": -1.718183159828186, + "step": 147 + }, + { + "epoch": 0.3129170796167823, + "grad_norm": 1.5049912929534912, + "learning_rate": 8.689166395208636e-07, + "logits/chosen": -0.38984015583992004, + "logits/rejected": -0.35900723934173584, + "logps/chosen": -0.6424779891967773, + "logps/rejected": -0.7145389318466187, + "loss": 1.3261, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.2849559783935547, + "rewards/margins": 0.14412200450897217, + "rewards/rejected": -1.4290778636932373, + "step": 148 + }, + { + "epoch": 0.31503138420878757, + "grad_norm": 0.36125388741493225, + "learning_rate": 8.664059206461534e-07, + "logits/chosen": -0.3490441143512726, + "logits/rejected": -0.3219914436340332, + "logps/chosen": -0.7200264930725098, + "logps/rejected": -0.7924249768257141, + "loss": 1.3476, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4400529861450195, + "rewards/margins": 0.1447969526052475, + "rewards/rejected": -1.5848499536514282, + "step": 149 + }, + { + "epoch": 0.31714568880079286, + "grad_norm": 1.039840579032898, + "learning_rate": 8.638750863781612e-07, + "logits/chosen": -0.40701645612716675, + "logits/rejected": -0.406186580657959, + "logps/chosen": -0.7083575129508972, + "logps/rejected": -0.7766748070716858, + "loss": 1.3263, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4167150259017944, + "rewards/margins": 0.1366347074508667, + "rewards/rejected": -1.5533496141433716, + "step": 150 + }, + { + "epoch": 0.31925999339279815, + "grad_norm": 0.7128564119338989, + "learning_rate": 8.613242756576728e-07, + "logits/chosen": -0.40932926535606384, + "logits/rejected": -0.4234562814235687, + "logps/chosen": -0.6775843501091003, + "logps/rejected": -0.7866222858428955, + "loss": 1.2834, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.3551687002182007, + "rewards/margins": 0.2180757373571396, + "rewards/rejected": -1.573244571685791, + "step": 151 + }, + { + "epoch": 0.32137429798480344, + "grad_norm": 1.1701059341430664, + "learning_rate": 8.587536285221655e-07, + "logits/chosen": -0.3654797077178955, + "logits/rejected": -0.3181680738925934, + "logps/chosen": -0.6686022877693176, + "logps/rejected": -0.7058504223823547, + "loss": 1.3612, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.3372045755386353, + "rewards/margins": 0.07449636608362198, + "rewards/rejected": -1.4117008447647095, + "step": 152 + }, + { + "epoch": 0.3234886025768087, + "grad_norm": 0.8239700794219971, + "learning_rate": 8.561632860981204e-07, + "logits/chosen": -0.42527130246162415, + "logits/rejected": -0.4091627299785614, + "logps/chosen": -0.6969794631004333, + "logps/rejected": -0.8019355535507202, + "loss": 1.2974, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3939589262008667, + "rewards/margins": 0.20991206169128418, + "rewards/rejected": -1.6038711071014404, + "step": 153 + }, + { + "epoch": 0.325602907168814, + "grad_norm": 1.4885636568069458, + "learning_rate": 8.535533905932737e-07, + "logits/chosen": -0.4126192331314087, + "logits/rejected": -0.41548141837120056, + "logps/chosen": -0.7076549530029297, + "logps/rejected": -0.7940821051597595, + "loss": 1.3198, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4153099060058594, + "rewards/margins": 0.17285437881946564, + "rewards/rejected": -1.588164210319519, + "step": 154 + }, + { + "epoch": 0.3277172117608193, + "grad_norm": 1.439434289932251, + "learning_rate": 8.509240852888106e-07, + "logits/chosen": -0.3763914704322815, + "logits/rejected": -0.3617165684700012, + "logps/chosen": -0.7189474105834961, + "logps/rejected": -0.827629804611206, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4378948211669922, + "rewards/margins": 0.2173648476600647, + "rewards/rejected": -1.655259609222412, + "step": 155 + }, + { + "epoch": 0.3298315163528246, + "grad_norm": 1.4505418539047241, + "learning_rate": 8.482755145314985e-07, + "logits/chosen": -0.37879478931427, + "logits/rejected": -0.38689684867858887, + "logps/chosen": -0.7011865973472595, + "logps/rejected": -0.8019431829452515, + "loss": 1.3158, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.402373194694519, + "rewards/margins": 0.2015131413936615, + "rewards/rejected": -1.603886365890503, + "step": 156 + }, + { + "epoch": 0.3319458209448299, + "grad_norm": 2.0968713760375977, + "learning_rate": 8.45607823725763e-07, + "logits/chosen": -0.4366365075111389, + "logits/rejected": -0.41210681200027466, + "logps/chosen": -0.6455651521682739, + "logps/rejected": -0.7228428721427917, + "loss": 1.3247, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.2911303043365479, + "rewards/margins": 0.1545555144548416, + "rewards/rejected": -1.4456857442855835, + "step": 157 + }, + { + "epoch": 0.3340601255368352, + "grad_norm": 0.6716106534004211, + "learning_rate": 8.429211593257052e-07, + "logits/chosen": -0.42992207407951355, + "logits/rejected": -0.4105672836303711, + "logps/chosen": -0.6981461048126221, + "logps/rejected": -0.7909567952156067, + "loss": 1.3128, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3962922096252441, + "rewards/margins": 0.1856214702129364, + "rewards/rejected": -1.5819135904312134, + "step": 158 + }, + { + "epoch": 0.33617443012884046, + "grad_norm": 2.4430501461029053, + "learning_rate": 8.402156688270612e-07, + "logits/chosen": -0.4184916317462921, + "logits/rejected": -0.3943992257118225, + "logps/chosen": -0.6568948030471802, + "logps/rejected": -0.7506390810012817, + "loss": 1.2992, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3137896060943604, + "rewards/margins": 0.18748846650123596, + "rewards/rejected": -1.5012781620025635, + "step": 159 + }, + { + "epoch": 0.3382887347208457, + "grad_norm": 2.0322091579437256, + "learning_rate": 8.374915007591052e-07, + "logits/chosen": -0.4713057577610016, + "logits/rejected": -0.42163771390914917, + "logps/chosen": -0.7347853779792786, + "logps/rejected": -0.7770044207572937, + "loss": 1.3801, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4695707559585571, + "rewards/margins": 0.0844380110502243, + "rewards/rejected": -1.5540088415145874, + "step": 160 + }, + { + "epoch": 0.340403039312851, + "grad_norm": 0.4045500159263611, + "learning_rate": 8.347488046764948e-07, + "logits/chosen": -0.39465126395225525, + "logits/rejected": -0.3961923122406006, + "logps/chosen": -0.601732075214386, + "logps/rejected": -0.694148600101471, + "loss": 1.2859, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.203464150428772, + "rewards/margins": 0.18483319878578186, + "rewards/rejected": -1.388297200202942, + "step": 161 + }, + { + "epoch": 0.3425173439048563, + "grad_norm": 2.79396915435791, + "learning_rate": 8.319877311510612e-07, + "logits/chosen": -0.4311378002166748, + "logits/rejected": -0.4248836636543274, + "logps/chosen": -0.6813413500785828, + "logps/rejected": -0.775830864906311, + "loss": 1.3001, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3626827001571655, + "rewards/margins": 0.18897925317287445, + "rewards/rejected": -1.551661729812622, + "step": 162 + }, + { + "epoch": 0.34463164849686156, + "grad_norm": 0.714146077632904, + "learning_rate": 8.292084317635419e-07, + "logits/chosen": -0.4060715436935425, + "logits/rejected": -0.3770482540130615, + "logps/chosen": -0.7176523208618164, + "logps/rejected": -0.7973593473434448, + "loss": 1.324, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4353046417236328, + "rewards/margins": 0.15941408276557922, + "rewards/rejected": -1.5947186946868896, + "step": 163 + }, + { + "epoch": 0.34674595308886685, + "grad_norm": 1.6007037162780762, + "learning_rate": 8.264110590952607e-07, + "logits/chosen": -0.49063974618911743, + "logits/rejected": -0.5119628310203552, + "logps/chosen": -0.7263911366462708, + "logps/rejected": -0.9138184785842896, + "loss": 1.2439, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.4527822732925415, + "rewards/margins": 0.3748546540737152, + "rewards/rejected": -1.827636957168579, + "step": 164 + }, + { + "epoch": 0.34886025768087214, + "grad_norm": 1.4566830396652222, + "learning_rate": 8.235957667197494e-07, + "logits/chosen": -0.4681779146194458, + "logits/rejected": -0.46475380659103394, + "logps/chosen": -0.6923782229423523, + "logps/rejected": -0.7901281118392944, + "loss": 1.295, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.3847564458847046, + "rewards/margins": 0.19549959897994995, + "rewards/rejected": -1.5802562236785889, + "step": 165 + }, + { + "epoch": 0.35097456227287743, + "grad_norm": 3.0825328826904297, + "learning_rate": 8.207627091943177e-07, + "logits/chosen": -0.4294862151145935, + "logits/rejected": -0.42411237955093384, + "logps/chosen": -0.6851246356964111, + "logps/rejected": -0.7844961881637573, + "loss": 1.2871, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3702492713928223, + "rewards/margins": 0.19874317944049835, + "rewards/rejected": -1.5689923763275146, + "step": 166 + }, + { + "epoch": 0.3530888668648827, + "grad_norm": 1.0783339738845825, + "learning_rate": 8.179120420515675e-07, + "logits/chosen": -0.4528030455112457, + "logits/rejected": -0.4626815617084503, + "logps/chosen": -0.703376293182373, + "logps/rejected": -0.8752757906913757, + "loss": 1.2193, + "rewards/accuracies": 0.703125, + "rewards/chosen": -1.406752586364746, + "rewards/margins": 0.34379899501800537, + "rewards/rejected": -1.7505515813827515, + "step": 167 + }, + { + "epoch": 0.355203171456888, + "grad_norm": 2.6788036823272705, + "learning_rate": 8.150439217908556e-07, + "logits/chosen": -0.44946759939193726, + "logits/rejected": -0.47430264949798584, + "logps/chosen": -0.751136839389801, + "logps/rejected": -0.874577522277832, + "loss": 1.29, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.502273678779602, + "rewards/margins": 0.24688144028186798, + "rewards/rejected": -1.749155044555664, + "step": 168 + }, + { + "epoch": 0.3573174760488933, + "grad_norm": 0.9087730050086975, + "learning_rate": 8.121585058696999e-07, + "logits/chosen": -0.47294262051582336, + "logits/rejected": -0.46765226125717163, + "logps/chosen": -0.7291173934936523, + "logps/rejected": -0.7999277114868164, + "loss": 1.3482, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4582347869873047, + "rewards/margins": 0.1416206806898117, + "rewards/rejected": -1.5998554229736328, + "step": 169 + }, + { + "epoch": 0.3594317806408986, + "grad_norm": 3.392674207687378, + "learning_rate": 8.092559526951374e-07, + "logits/chosen": -0.5026620626449585, + "logits/rejected": -0.46620574593544006, + "logps/chosen": -0.746992290019989, + "logps/rejected": -0.8266301155090332, + "loss": 1.3202, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.493984580039978, + "rewards/margins": 0.15927578508853912, + "rewards/rejected": -1.6532602310180664, + "step": 170 + }, + { + "epoch": 0.3615460852329039, + "grad_norm": 1.27628755569458, + "learning_rate": 8.063364216150256e-07, + "logits/chosen": -0.5211395025253296, + "logits/rejected": -0.5419963598251343, + "logps/chosen": -0.7919114828109741, + "logps/rejected": -0.8731362223625183, + "loss": 1.3228, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5838229656219482, + "rewards/margins": 0.16244953870773315, + "rewards/rejected": -1.7462724447250366, + "step": 171 + }, + { + "epoch": 0.36366038982490917, + "grad_norm": 0.8269656896591187, + "learning_rate": 8.034000729092967e-07, + "logits/chosen": -0.49545183777809143, + "logits/rejected": -0.4716613292694092, + "logps/chosen": -0.719520092010498, + "logps/rejected": -0.7876347303390503, + "loss": 1.3367, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.439040184020996, + "rewards/margins": 0.13622930645942688, + "rewards/rejected": -1.5752694606781006, + "step": 172 + }, + { + "epoch": 0.36577469441691446, + "grad_norm": 0.6049383282661438, + "learning_rate": 8.004470677811559e-07, + "logits/chosen": -0.45276379585266113, + "logits/rejected": -0.42617955803871155, + "logps/chosen": -0.7097947597503662, + "logps/rejected": -0.7606989145278931, + "loss": 1.3909, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4195895195007324, + "rewards/margins": 0.10180822014808655, + "rewards/rejected": -1.5213978290557861, + "step": 173 + }, + { + "epoch": 0.36788899900891975, + "grad_norm": 3.980013847351074, + "learning_rate": 7.974775683482337e-07, + "logits/chosen": -0.4783569574356079, + "logits/rejected": -0.43521156907081604, + "logps/chosen": -0.7623491287231445, + "logps/rejected": -0.8719285130500793, + "loss": 1.2838, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.524698257446289, + "rewards/margins": 0.2191585898399353, + "rewards/rejected": -1.7438570261001587, + "step": 174 + }, + { + "epoch": 0.370003303600925, + "grad_norm": 1.024530053138733, + "learning_rate": 7.94491737633684e-07, + "logits/chosen": -0.5009916424751282, + "logits/rejected": -0.48874592781066895, + "logps/chosen": -0.7552992701530457, + "logps/rejected": -0.8485872745513916, + "loss": 1.3153, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5105985403060913, + "rewards/margins": 0.18657605350017548, + "rewards/rejected": -1.6971745491027832, + "step": 175 + }, + { + "epoch": 0.37211760819293027, + "grad_norm": 1.5952919721603394, + "learning_rate": 7.91489739557236e-07, + "logits/chosen": -0.4424138069152832, + "logits/rejected": -0.4334307312965393, + "logps/chosen": -0.6956002116203308, + "logps/rejected": -0.8018803000450134, + "loss": 1.3011, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.3912004232406616, + "rewards/margins": 0.21256020665168762, + "rewards/rejected": -1.6037606000900269, + "step": 176 + }, + { + "epoch": 0.37423191278493556, + "grad_norm": 1.8331164121627808, + "learning_rate": 7.884717389261934e-07, + "logits/chosen": -0.4836267828941345, + "logits/rejected": -0.5018677115440369, + "logps/chosen": -0.7895969152450562, + "logps/rejected": -0.927432656288147, + "loss": 1.2467, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5791938304901123, + "rewards/margins": 0.27567166090011597, + "rewards/rejected": -1.854865312576294, + "step": 177 + }, + { + "epoch": 0.37634621737694085, + "grad_norm": 2.165984869003296, + "learning_rate": 7.854379014263876e-07, + "logits/chosen": -0.46125832200050354, + "logits/rejected": -0.39802712202072144, + "logps/chosen": -0.8382925391197205, + "logps/rejected": -0.9422982931137085, + "loss": 1.339, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.676585078239441, + "rewards/margins": 0.20801125466823578, + "rewards/rejected": -1.884596586227417, + "step": 178 + }, + { + "epoch": 0.37846052196894614, + "grad_norm": 0.522197425365448, + "learning_rate": 7.823883936130817e-07, + "logits/chosen": -0.4747823476791382, + "logits/rejected": -0.4888593554496765, + "logps/chosen": -0.723059892654419, + "logps/rejected": -0.84626305103302, + "loss": 1.2708, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.446119785308838, + "rewards/margins": 0.24640652537345886, + "rewards/rejected": -1.69252610206604, + "step": 179 + }, + { + "epoch": 0.38057482656095143, + "grad_norm": 1.9690748453140259, + "learning_rate": 7.793233829018262e-07, + "logits/chosen": -0.5430271625518799, + "logits/rejected": -0.5403288006782532, + "logps/chosen": -0.8244275450706482, + "logps/rejected": -0.9133931994438171, + "loss": 1.3306, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6488550901412964, + "rewards/margins": 0.17793115973472595, + "rewards/rejected": -1.8267863988876343, + "step": 180 + }, + { + "epoch": 0.3826891311529567, + "grad_norm": 2.9181363582611084, + "learning_rate": 7.762430375592688e-07, + "logits/chosen": -0.4843495786190033, + "logits/rejected": -0.47929176688194275, + "logps/chosen": -0.8097372055053711, + "logps/rejected": -0.8973760008811951, + "loss": 1.3283, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6194744110107422, + "rewards/margins": 0.17527759075164795, + "rewards/rejected": -1.7947520017623901, + "step": 181 + }, + { + "epoch": 0.384803435744962, + "grad_norm": 4.227083683013916, + "learning_rate": 7.731475266939158e-07, + "logits/chosen": -0.5047686696052551, + "logits/rejected": -0.4921850264072418, + "logps/chosen": -0.875984787940979, + "logps/rejected": -1.0406755208969116, + "loss": 1.3169, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.751969575881958, + "rewards/margins": 0.32938146591186523, + "rewards/rejected": -2.0813510417938232, + "step": 182 + }, + { + "epoch": 0.3869177403369673, + "grad_norm": 1.2871490716934204, + "learning_rate": 7.700370202468489e-07, + "logits/chosen": -0.5123783349990845, + "logits/rejected": -0.55179762840271, + "logps/chosen": -0.8869211077690125, + "logps/rejected": -1.1082773208618164, + "loss": 1.216, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.773842215538025, + "rewards/margins": 0.4427123963832855, + "rewards/rejected": -2.216554641723633, + "step": 183 + }, + { + "epoch": 0.3890320449289726, + "grad_norm": 1.3015679121017456, + "learning_rate": 7.669116889823954e-07, + "logits/chosen": -0.49182361364364624, + "logits/rejected": -0.5180585384368896, + "logps/chosen": -0.8816227912902832, + "logps/rejected": -0.9516821503639221, + "loss": 1.3449, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7632455825805664, + "rewards/margins": 0.14011862874031067, + "rewards/rejected": -1.9033643007278442, + "step": 184 + }, + { + "epoch": 0.3911463495209779, + "grad_norm": 4.280956268310547, + "learning_rate": 7.637717044787526e-07, + "logits/chosen": -0.5702117681503296, + "logits/rejected": -0.5475804209709167, + "logps/chosen": -0.9307697415351868, + "logps/rejected": -1.0322346687316895, + "loss": 1.3434, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.8615394830703735, + "rewards/margins": 0.20292985439300537, + "rewards/rejected": -2.064469337463379, + "step": 185 + }, + { + "epoch": 0.39326065411298317, + "grad_norm": 1.3511455059051514, + "learning_rate": 7.606172391185699e-07, + "logits/chosen": -0.5466108322143555, + "logits/rejected": -0.551085352897644, + "logps/chosen": -1.0657893419265747, + "logps/rejected": -1.15786612033844, + "loss": 1.3549, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -2.1315786838531494, + "rewards/margins": 0.18415334820747375, + "rewards/rejected": -2.31573224067688, + "step": 186 + }, + { + "epoch": 0.39537495870498846, + "grad_norm": 0.7001176476478577, + "learning_rate": 7.574484660794836e-07, + "logits/chosen": -0.4849010407924652, + "logits/rejected": -0.5057946443557739, + "logps/chosen": -1.0784757137298584, + "logps/rejected": -1.2035218477249146, + "loss": 1.3556, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.156951427459717, + "rewards/margins": 0.25009211897850037, + "rewards/rejected": -2.407043695449829, + "step": 187 + }, + { + "epoch": 0.39748926329699374, + "grad_norm": 3.1405649185180664, + "learning_rate": 7.542655593246103e-07, + "logits/chosen": -0.5316596031188965, + "logits/rejected": -0.5658366680145264, + "logps/chosen": -1.0630009174346924, + "logps/rejected": -1.2867177724838257, + "loss": 1.2612, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.1260018348693848, + "rewards/margins": 0.447433739900589, + "rewards/rejected": -2.5734355449676514, + "step": 188 + }, + { + "epoch": 0.39960356788899903, + "grad_norm": 2.142986297607422, + "learning_rate": 7.510686935929962e-07, + "logits/chosen": -0.5959028005599976, + "logits/rejected": -0.5836039781570435, + "logps/chosen": -1.111003041267395, + "logps/rejected": -1.1858208179473877, + "loss": 1.3958, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.22200608253479, + "rewards/margins": 0.149635449051857, + "rewards/rejected": -2.3716416358947754, + "step": 189 + }, + { + "epoch": 0.40171787248100427, + "grad_norm": 1.9227335453033447, + "learning_rate": 7.478580443900246e-07, + "logits/chosen": -0.607532799243927, + "logits/rejected": -0.6102017760276794, + "logps/chosen": -1.3353261947631836, + "logps/rejected": -1.3975369930267334, + "loss": 1.457, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -2.670652389526367, + "rewards/margins": 0.12442154437303543, + "rewards/rejected": -2.795073986053467, + "step": 190 + }, + { + "epoch": 0.40383217707300956, + "grad_norm": 0.8509105443954468, + "learning_rate": 7.446337879777802e-07, + "logits/chosen": -0.5903070569038391, + "logits/rejected": -0.5728173851966858, + "logps/chosen": -1.27094566822052, + "logps/rejected": -1.3024815320968628, + "loss": 1.4953, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.54189133644104, + "rewards/margins": 0.06307169049978256, + "rewards/rejected": -2.6049630641937256, + "step": 191 + }, + { + "epoch": 0.40594648166501485, + "grad_norm": 1.1561088562011719, + "learning_rate": 7.413961013653725e-07, + "logits/chosen": -0.5578102469444275, + "logits/rejected": -0.5907329320907593, + "logps/chosen": -1.3817013502120972, + "logps/rejected": -1.419295072555542, + "loss": 1.4865, + "rewards/accuracies": 0.515625, + "rewards/chosen": -2.7634027004241943, + "rewards/margins": 0.07518734782934189, + "rewards/rejected": -2.838590145111084, + "step": 192 + }, + { + "epoch": 0.40806078625702014, + "grad_norm": 8.165387153625488, + "learning_rate": 7.381451622992183e-07, + "logits/chosen": -0.5213198661804199, + "logits/rejected": -0.5392848253250122, + "logps/chosen": -1.1798306703567505, + "logps/rejected": -1.2692899703979492, + "loss": 1.3971, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.359661340713501, + "rewards/margins": 0.17891867458820343, + "rewards/rejected": -2.5385799407958984, + "step": 193 + }, + { + "epoch": 0.4101750908490254, + "grad_norm": 1.2850884199142456, + "learning_rate": 7.348811492532839e-07, + "logits/chosen": -0.5382787585258484, + "logits/rejected": -0.5274642705917358, + "logps/chosen": -1.242587685585022, + "logps/rejected": -1.272438645362854, + "loss": 1.4795, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.485175371170044, + "rewards/margins": 0.05970197170972824, + "rewards/rejected": -2.544877290725708, + "step": 194 + }, + { + "epoch": 0.4122893954410307, + "grad_norm": 4.910929203033447, + "learning_rate": 7.316042414192864e-07, + "logits/chosen": -0.6186666488647461, + "logits/rejected": -0.6255884170532227, + "logps/chosen": -1.1743704080581665, + "logps/rejected": -1.2720146179199219, + "loss": 1.4127, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.348740816116333, + "rewards/margins": 0.19528816640377045, + "rewards/rejected": -2.5440292358398438, + "step": 195 + }, + { + "epoch": 0.414403700033036, + "grad_norm": 4.270901203155518, + "learning_rate": 7.283146186968565e-07, + "logits/chosen": -0.5861366987228394, + "logits/rejected": -0.6005197763442993, + "logps/chosen": -1.2127022743225098, + "logps/rejected": -1.3036490678787231, + "loss": 1.4067, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.4254045486450195, + "rewards/margins": 0.18189355731010437, + "rewards/rejected": -2.6072981357574463, + "step": 196 + }, + { + "epoch": 0.4165180046250413, + "grad_norm": 0.3070116639137268, + "learning_rate": 7.250124616836622e-07, + "logits/chosen": -0.6026022434234619, + "logits/rejected": -0.5920048952102661, + "logps/chosen": -1.0706496238708496, + "logps/rejected": -1.2879594564437866, + "loss": 1.2465, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.141299247741699, + "rewards/margins": 0.4346192479133606, + "rewards/rejected": -2.5759189128875732, + "step": 197 + }, + { + "epoch": 0.4186323092170466, + "grad_norm": 1.160252571105957, + "learning_rate": 7.216979516654943e-07, + "logits/chosen": -0.5808722376823425, + "logits/rejected": -0.5770124197006226, + "logps/chosen": -1.0426011085510254, + "logps/rejected": -1.1295092105865479, + "loss": 1.4244, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.085202217102051, + "rewards/margins": 0.1738162338733673, + "rewards/rejected": -2.2590184211730957, + "step": 198 + }, + { + "epoch": 0.4207466138090519, + "grad_norm": 4.6966471672058105, + "learning_rate": 7.183712706063132e-07, + "logits/chosen": -0.5958350896835327, + "logits/rejected": -0.6440161466598511, + "logps/chosen": -0.981076717376709, + "logps/rejected": -1.1257147789001465, + "loss": 1.3175, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.962153434753418, + "rewards/margins": 0.28927627205848694, + "rewards/rejected": -2.251429557800293, + "step": 199 + }, + { + "epoch": 0.42286091840105716, + "grad_norm": 2.9395248889923096, + "learning_rate": 7.150326011382603e-07, + "logits/chosen": -0.5647889375686646, + "logits/rejected": -0.5762943625450134, + "logps/chosen": -0.8101261854171753, + "logps/rejected": -1.0001438856124878, + "loss": 1.2135, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6202523708343506, + "rewards/margins": 0.38003528118133545, + "rewards/rejected": -2.0002877712249756, + "step": 200 + }, + { + "epoch": 0.42497522299306245, + "grad_norm": 1.2575147151947021, + "learning_rate": 7.116821265516306e-07, + "logits/chosen": -0.5834293961524963, + "logits/rejected": -0.5929508805274963, + "logps/chosen": -0.8768399953842163, + "logps/rejected": -1.0942046642303467, + "loss": 1.219, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7536799907684326, + "rewards/margins": 0.43472927808761597, + "rewards/rejected": -2.1884093284606934, + "step": 201 + }, + { + "epoch": 0.42708952758506774, + "grad_norm": 1.4035751819610596, + "learning_rate": 7.083200307848115e-07, + "logits/chosen": -0.5424078106880188, + "logits/rejected": -0.5316082239151001, + "logps/chosen": -0.8791903257369995, + "logps/rejected": -0.9323580265045166, + "loss": 1.3675, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.758380651473999, + "rewards/margins": 0.10633517056703568, + "rewards/rejected": -1.8647160530090332, + "step": 202 + }, + { + "epoch": 0.42920383217707303, + "grad_norm": 1.8622503280639648, + "learning_rate": 7.049464984141829e-07, + "logits/chosen": -0.5329294204711914, + "logits/rejected": -0.5523126721382141, + "logps/chosen": -0.695776104927063, + "logps/rejected": -0.8400713801383972, + "loss": 1.2285, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.391552209854126, + "rewards/margins": 0.28859058022499084, + "rewards/rejected": -1.6801427602767944, + "step": 203 + }, + { + "epoch": 0.4313181367690783, + "grad_norm": 0.8603182435035706, + "learning_rate": 7.015617146439861e-07, + "logits/chosen": -0.4516752064228058, + "logits/rejected": -0.46907976269721985, + "logps/chosen": -0.6868133544921875, + "logps/rejected": -0.8646677732467651, + "loss": 1.2417, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.373626708984375, + "rewards/margins": 0.355709046125412, + "rewards/rejected": -1.7293355464935303, + "step": 204 + }, + { + "epoch": 0.43343244136108355, + "grad_norm": 0.6437748670578003, + "learning_rate": 6.981658652961546e-07, + "logits/chosen": -0.6159051656723022, + "logits/rejected": -0.6000130772590637, + "logps/chosen": -0.7715178728103638, + "logps/rejected": -0.8714219331741333, + "loss": 1.3469, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5430357456207275, + "rewards/margins": 0.19980813562870026, + "rewards/rejected": -1.7428438663482666, + "step": 205 + }, + { + "epoch": 0.43554674595308884, + "grad_norm": 1.2309322357177734, + "learning_rate": 6.947591368001137e-07, + "logits/chosen": -0.5913614630699158, + "logits/rejected": -0.6128537654876709, + "logps/chosen": -0.7512561678886414, + "logps/rejected": -0.8872793912887573, + "loss": 1.26, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5025123357772827, + "rewards/margins": 0.2720465660095215, + "rewards/rejected": -1.7745587825775146, + "step": 206 + }, + { + "epoch": 0.43766105054509413, + "grad_norm": 0.6153685450553894, + "learning_rate": 6.913417161825449e-07, + "logits/chosen": -0.5976595878601074, + "logits/rejected": -0.6222202181816101, + "logps/chosen": -0.837669849395752, + "logps/rejected": -0.9835771918296814, + "loss": 1.2986, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.675339698791504, + "rewards/margins": 0.2918146252632141, + "rewards/rejected": -1.9671543836593628, + "step": 207 + }, + { + "epoch": 0.4397753551370994, + "grad_norm": 1.9922760725021362, + "learning_rate": 6.87913791057119e-07, + "logits/chosen": -0.6808818578720093, + "logits/rejected": -0.6692708730697632, + "logps/chosen": -0.7088961601257324, + "logps/rejected": -0.8256410360336304, + "loss": 1.281, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4177923202514648, + "rewards/margins": 0.23348984122276306, + "rewards/rejected": -1.6512820720672607, + "step": 208 + }, + { + "epoch": 0.4418896597291047, + "grad_norm": 1.9562067985534668, + "learning_rate": 6.844755496141961e-07, + "logits/chosen": -0.5282632112503052, + "logits/rejected": -0.5692226886749268, + "logps/chosen": -0.7235382795333862, + "logps/rejected": -0.801092803478241, + "loss": 1.3227, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4470765590667725, + "rewards/margins": 0.1551089584827423, + "rewards/rejected": -1.602185606956482, + "step": 209 + }, + { + "epoch": 0.44400396432111, + "grad_norm": 0.8182584047317505, + "learning_rate": 6.81027180610493e-07, + "logits/chosen": -0.6418904662132263, + "logits/rejected": -0.5941328406333923, + "logps/chosen": -0.820648729801178, + "logps/rejected": -0.8864803910255432, + "loss": 1.3498, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.641297459602356, + "rewards/margins": 0.13166317343711853, + "rewards/rejected": -1.7729607820510864, + "step": 210 + }, + { + "epoch": 0.4461182689131153, + "grad_norm": 3.075260877609253, + "learning_rate": 6.775688733587227e-07, + "logits/chosen": -0.5926809906959534, + "logits/rejected": -0.5844541788101196, + "logps/chosen": -0.7822425365447998, + "logps/rejected": -0.8866626024246216, + "loss": 1.2884, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5644850730895996, + "rewards/margins": 0.20884013175964355, + "rewards/rejected": -1.7733252048492432, + "step": 211 + }, + { + "epoch": 0.4482325735051206, + "grad_norm": 0.8032744526863098, + "learning_rate": 6.741008177171993e-07, + "logits/chosen": -0.579971432685852, + "logits/rejected": -0.5978566408157349, + "logps/chosen": -0.721234917640686, + "logps/rejected": -0.8368514180183411, + "loss": 1.2781, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.442469835281372, + "rewards/margins": 0.23123310506343842, + "rewards/rejected": -1.6737028360366821, + "step": 212 + }, + { + "epoch": 0.45034687809712587, + "grad_norm": 0.6680911779403687, + "learning_rate": 6.706232040794161e-07, + "logits/chosen": -0.6748596429824829, + "logits/rejected": -0.6615546941757202, + "logps/chosen": -0.7931480407714844, + "logps/rejected": -0.8879257440567017, + "loss": 1.337, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5862960815429688, + "rewards/margins": 0.1895553171634674, + "rewards/rejected": -1.7758514881134033, + "step": 213 + }, + { + "epoch": 0.45246118268913116, + "grad_norm": 2.5107688903808594, + "learning_rate": 6.671362233635925e-07, + "logits/chosen": -0.6460363268852234, + "logits/rejected": -0.6273557543754578, + "logps/chosen": -0.823783814907074, + "logps/rejected": -0.87412428855896, + "loss": 1.3756, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.647567629814148, + "rewards/margins": 0.10068092495203018, + "rewards/rejected": -1.74824857711792, + "step": 214 + }, + { + "epoch": 0.45457548728113645, + "grad_norm": 2.2206740379333496, + "learning_rate": 6.636400670021933e-07, + "logits/chosen": -0.6295229196548462, + "logits/rejected": -0.6330893039703369, + "logps/chosen": -0.807812511920929, + "logps/rejected": -0.9784457683563232, + "loss": 1.2259, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.615625023841858, + "rewards/margins": 0.3412665128707886, + "rewards/rejected": -1.9568915367126465, + "step": 215 + }, + { + "epoch": 0.45668979187314174, + "grad_norm": 1.2925803661346436, + "learning_rate": 6.601349269314187e-07, + "logits/chosen": -0.6001027822494507, + "logits/rejected": -0.6305864453315735, + "logps/chosen": -0.7216315865516663, + "logps/rejected": -0.8616191744804382, + "loss": 1.269, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.4432631731033325, + "rewards/margins": 0.2799749970436096, + "rewards/rejected": -1.7232383489608765, + "step": 216 + }, + { + "epoch": 0.458804096465147, + "grad_norm": 4.863992214202881, + "learning_rate": 6.566209955806679e-07, + "logits/chosen": -0.5307935476303101, + "logits/rejected": -0.5385264754295349, + "logps/chosen": -0.8053566813468933, + "logps/rejected": -0.9241464734077454, + "loss": 1.3325, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.6107133626937866, + "rewards/margins": 0.23757943511009216, + "rewards/rejected": -1.8482929468154907, + "step": 217 + }, + { + "epoch": 0.4609184010571523, + "grad_norm": 1.0189604759216309, + "learning_rate": 6.530984658619733e-07, + "logits/chosen": -0.7031885385513306, + "logits/rejected": -0.7072005867958069, + "logps/chosen": -0.8382629752159119, + "logps/rejected": -0.9468755722045898, + "loss": 1.3276, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.6765259504318237, + "rewards/margins": 0.21722503006458282, + "rewards/rejected": -1.8937511444091797, + "step": 218 + }, + { + "epoch": 0.4630327056491576, + "grad_norm": 1.1178699731826782, + "learning_rate": 6.495675311594122e-07, + "logits/chosen": -0.5736142992973328, + "logits/rejected": -0.5926069021224976, + "logps/chosen": -0.7676032781600952, + "logps/rejected": -0.9179919958114624, + "loss": 1.278, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5352065563201904, + "rewards/margins": 0.3007773756980896, + "rewards/rejected": -1.8359839916229248, + "step": 219 + }, + { + "epoch": 0.4651470102411629, + "grad_norm": 2.4985287189483643, + "learning_rate": 6.460283853184879e-07, + "logits/chosen": -0.6372602581977844, + "logits/rejected": -0.6313104033470154, + "logps/chosen": -0.8754556179046631, + "logps/rejected": -0.9803894758224487, + "loss": 1.3166, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7509112358093262, + "rewards/margins": 0.2098677009344101, + "rewards/rejected": -1.9607789516448975, + "step": 220 + }, + { + "epoch": 0.46726131483316813, + "grad_norm": 1.5675435066223145, + "learning_rate": 6.424812226354889e-07, + "logits/chosen": -0.6377983093261719, + "logits/rejected": -0.6666730642318726, + "logps/chosen": -0.7556843757629395, + "logps/rejected": -0.9096466302871704, + "loss": 1.2397, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.511368751525879, + "rewards/margins": 0.30792441964149475, + "rewards/rejected": -1.8192932605743408, + "step": 221 + }, + { + "epoch": 0.4693756194251734, + "grad_norm": 2.853426218032837, + "learning_rate": 6.389262378468219e-07, + "logits/chosen": -0.6055567860603333, + "logits/rejected": -0.612144947052002, + "logps/chosen": -0.8588352203369141, + "logps/rejected": -0.8928595185279846, + "loss": 1.4022, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7176704406738281, + "rewards/margins": 0.06804870069026947, + "rewards/rejected": -1.7857190370559692, + "step": 222 + }, + { + "epoch": 0.4714899240171787, + "grad_norm": 0.528042733669281, + "learning_rate": 6.353636261183213e-07, + "logits/chosen": -0.6543641090393066, + "logits/rejected": -0.6635830402374268, + "logps/chosen": -0.7858147621154785, + "logps/rejected": -0.9400445222854614, + "loss": 1.2446, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.571629524230957, + "rewards/margins": 0.3084595203399658, + "rewards/rejected": -1.8800890445709229, + "step": 223 + }, + { + "epoch": 0.473604228609184, + "grad_norm": 1.1155768632888794, + "learning_rate": 6.317935830345338e-07, + "logits/chosen": -0.5700349807739258, + "logits/rejected": -0.6560614705085754, + "logps/chosen": -0.8426170945167542, + "logps/rejected": -0.9983471035957336, + "loss": 1.3204, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6852341890335083, + "rewards/margins": 0.3114599883556366, + "rewards/rejected": -1.9966942071914673, + "step": 224 + }, + { + "epoch": 0.4757185332011893, + "grad_norm": 0.802669107913971, + "learning_rate": 6.282163045879823e-07, + "logits/chosen": -0.6912901401519775, + "logits/rejected": -0.7201069593429565, + "logps/chosen": -0.8135342597961426, + "logps/rejected": -0.9537283182144165, + "loss": 1.2961, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6270685195922852, + "rewards/margins": 0.2803882658481598, + "rewards/rejected": -1.907456636428833, + "step": 225 + }, + { + "epoch": 0.4778328377931946, + "grad_norm": 1.709757924079895, + "learning_rate": 6.246319871684047e-07, + "logits/chosen": -0.7573816776275635, + "logits/rejected": -0.8028420209884644, + "logps/chosen": -0.891952633857727, + "logps/rejected": -1.0168029069900513, + "loss": 1.333, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.783905267715454, + "rewards/margins": 0.24970072507858276, + "rewards/rejected": -2.0336058139801025, + "step": 226 + }, + { + "epoch": 0.47994714238519987, + "grad_norm": 2.170957326889038, + "learning_rate": 6.210408275519734e-07, + "logits/chosen": -0.6915597915649414, + "logits/rejected": -0.7027997970581055, + "logps/chosen": -0.9063036441802979, + "logps/rejected": -1.0104373693466187, + "loss": 1.3388, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8126072883605957, + "rewards/margins": 0.20826762914657593, + "rewards/rejected": -2.0208747386932373, + "step": 227 + }, + { + "epoch": 0.48206144697720515, + "grad_norm": 1.8802261352539062, + "learning_rate": 6.174430228904919e-07, + "logits/chosen": -0.689726710319519, + "logits/rejected": -0.7143282890319824, + "logps/chosen": -0.7480812072753906, + "logps/rejected": -0.8698041439056396, + "loss": 1.2836, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4961624145507812, + "rewards/margins": 0.24344584345817566, + "rewards/rejected": -1.7396082878112793, + "step": 228 + }, + { + "epoch": 0.48417575156921044, + "grad_norm": 2.5202934741973877, + "learning_rate": 6.13838770700571e-07, + "logits/chosen": -0.6858299374580383, + "logits/rejected": -0.7115206122398376, + "logps/chosen": -0.8575515151023865, + "logps/rejected": -0.9657347202301025, + "loss": 1.3046, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.715103030204773, + "rewards/margins": 0.21636635065078735, + "rewards/rejected": -1.931469440460205, + "step": 229 + }, + { + "epoch": 0.48629005616121573, + "grad_norm": 1.268512487411499, + "learning_rate": 6.102282688527859e-07, + "logits/chosen": -0.7078689932823181, + "logits/rejected": -0.7254161238670349, + "logps/chosen": -0.8850880861282349, + "logps/rejected": -1.031385898590088, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7701761722564697, + "rewards/margins": 0.29259535670280457, + "rewards/rejected": -2.062771797180176, + "step": 230 + }, + { + "epoch": 0.488404360753221, + "grad_norm": 1.7285584211349487, + "learning_rate": 6.066117155608135e-07, + "logits/chosen": -0.7325868606567383, + "logits/rejected": -0.7433226108551025, + "logps/chosen": -0.8014956116676331, + "logps/rejected": -0.9653260111808777, + "loss": 1.2429, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.6029912233352661, + "rewards/margins": 0.32766085863113403, + "rewards/rejected": -1.9306520223617554, + "step": 231 + }, + { + "epoch": 0.4905186653452263, + "grad_norm": 0.6270304322242737, + "learning_rate": 6.029893093705491e-07, + "logits/chosen": -0.692166805267334, + "logits/rejected": -0.6799293756484985, + "logps/chosen": -0.7850213646888733, + "logps/rejected": -0.8839574456214905, + "loss": 1.2967, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.5700427293777466, + "rewards/margins": 0.19787229597568512, + "rewards/rejected": -1.767914891242981, + "step": 232 + }, + { + "epoch": 0.4926329699372316, + "grad_norm": 1.0160484313964844, + "learning_rate": 5.993612491492087e-07, + "logits/chosen": -0.7095844149589539, + "logits/rejected": -0.71524578332901, + "logps/chosen": -0.7063854336738586, + "logps/rejected": -0.8855549097061157, + "loss": 1.2176, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.4127708673477173, + "rewards/margins": 0.3583390712738037, + "rewards/rejected": -1.7711098194122314, + "step": 233 + }, + { + "epoch": 0.4947472745292369, + "grad_norm": 2.225841999053955, + "learning_rate": 5.957277340744094e-07, + "logits/chosen": -0.7488946318626404, + "logits/rejected": -0.7588428854942322, + "logps/chosen": -0.9203822612762451, + "logps/rejected": -1.0089298486709595, + "loss": 1.355, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8407645225524902, + "rewards/margins": 0.17709502577781677, + "rewards/rejected": -2.017859697341919, + "step": 234 + }, + { + "epoch": 0.4968615791212422, + "grad_norm": 1.9577795267105103, + "learning_rate": 5.920889636232351e-07, + "logits/chosen": -0.8078997731208801, + "logits/rejected": -0.8064825534820557, + "logps/chosen": -0.8004480004310608, + "logps/rejected": -0.9856831431388855, + "loss": 1.2273, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.6008960008621216, + "rewards/margins": 0.3704703152179718, + "rewards/rejected": -1.971366286277771, + "step": 235 + }, + { + "epoch": 0.4989758837132474, + "grad_norm": 2.5050246715545654, + "learning_rate": 5.884451375612865e-07, + "logits/chosen": -0.7499472498893738, + "logits/rejected": -0.7421904802322388, + "logps/chosen": -0.8363584876060486, + "logps/rejected": -0.9543781876564026, + "loss": 1.3002, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6727169752120972, + "rewards/margins": 0.23603934049606323, + "rewards/rejected": -1.9087563753128052, + "step": 236 + }, + { + "epoch": 0.5010901883052528, + "grad_norm": 0.585436224937439, + "learning_rate": 5.847964559317128e-07, + "logits/chosen": -0.730015218257904, + "logits/rejected": -0.7154791355133057, + "logps/chosen": -0.8828849196434021, + "logps/rejected": -0.9897070527076721, + "loss": 1.347, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.7657698392868042, + "rewards/margins": 0.21364440023899078, + "rewards/rejected": -1.9794141054153442, + "step": 237 + }, + { + "epoch": 0.503204492897258, + "grad_norm": 0.9204092621803284, + "learning_rate": 5.8114311904423e-07, + "logits/chosen": -0.759974479675293, + "logits/rejected": -0.7793674468994141, + "logps/chosen": -0.8321584463119507, + "logps/rejected": -1.0809751749038696, + "loss": 1.2185, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6643168926239014, + "rewards/margins": 0.4976334273815155, + "rewards/rejected": -2.1619503498077393, + "step": 238 + }, + { + "epoch": 0.5053187974892633, + "grad_norm": 5.147011756896973, + "learning_rate": 5.774853274641243e-07, + "logits/chosen": -0.7148956060409546, + "logits/rejected": -0.7363921403884888, + "logps/chosen": -0.8623124361038208, + "logps/rejected": -1.0681498050689697, + "loss": 1.2353, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7246248722076416, + "rewards/margins": 0.4116746187210083, + "rewards/rejected": -2.1362996101379395, + "step": 239 + }, + { + "epoch": 0.5074331020812686, + "grad_norm": 1.9065529108047485, + "learning_rate": 5.738232820012407e-07, + "logits/chosen": -0.7158540487289429, + "logits/rejected": -0.7083900570869446, + "logps/chosen": -0.981558620929718, + "logps/rejected": -1.054612636566162, + "loss": 1.3594, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.963117241859436, + "rewards/margins": 0.14610806107521057, + "rewards/rejected": -2.109225273132324, + "step": 240 + }, + { + "epoch": 0.5095474066732739, + "grad_norm": 2.4411256313323975, + "learning_rate": 5.701571836989591e-07, + "logits/chosen": -0.8441444039344788, + "logits/rejected": -0.8529233336448669, + "logps/chosen": -0.8665949702262878, + "logps/rejected": -1.030572772026062, + "loss": 1.2477, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.7331899404525757, + "rewards/margins": 0.3279556334018707, + "rewards/rejected": -2.061145544052124, + "step": 241 + }, + { + "epoch": 0.5116617112652792, + "grad_norm": 2.461113214492798, + "learning_rate": 5.664872338231571e-07, + "logits/chosen": -0.7463312149047852, + "logits/rejected": -0.7725105285644531, + "logps/chosen": -0.9185941815376282, + "logps/rejected": -1.1244423389434814, + "loss": 1.2404, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.8371883630752563, + "rewards/margins": 0.411696195602417, + "rewards/rejected": -2.248884677886963, + "step": 242 + }, + { + "epoch": 0.5137760158572844, + "grad_norm": 3.5861761569976807, + "learning_rate": 5.628136338511607e-07, + "logits/chosen": -0.8432914018630981, + "logits/rejected": -0.85801100730896, + "logps/chosen": -0.8873915672302246, + "logps/rejected": -1.0090795755386353, + "loss": 1.3072, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.7747831344604492, + "rewards/margins": 0.24337637424468994, + "rewards/rejected": -2.0181591510772705, + "step": 243 + }, + { + "epoch": 0.5158903204492897, + "grad_norm": 2.109071969985962, + "learning_rate": 5.591365854606829e-07, + "logits/chosen": -0.7899532318115234, + "logits/rejected": -0.7548331618309021, + "logps/chosen": -0.9333330392837524, + "logps/rejected": -1.00949227809906, + "loss": 1.3749, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.8666660785675049, + "rewards/margins": 0.1523183286190033, + "rewards/rejected": -2.01898455619812, + "step": 244 + }, + { + "epoch": 0.518004625041295, + "grad_norm": 2.2017955780029297, + "learning_rate": 5.554562905187527e-07, + "logits/chosen": -0.7569047212600708, + "logits/rejected": -0.7679808735847473, + "logps/chosen": -0.9779613614082336, + "logps/rejected": -1.1713427305221558, + "loss": 1.2628, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.9559227228164673, + "rewards/margins": 0.3867628276348114, + "rewards/rejected": -2.3426854610443115, + "step": 245 + }, + { + "epoch": 0.5201189296333003, + "grad_norm": 4.651991367340088, + "learning_rate": 5.517729510706315e-07, + "logits/chosen": -0.8546395301818848, + "logits/rejected": -0.8609369397163391, + "logps/chosen": -0.9926605224609375, + "logps/rejected": -1.1553713083267212, + "loss": 1.2812, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.985321044921875, + "rewards/margins": 0.32542160153388977, + "rewards/rejected": -2.3107426166534424, + "step": 246 + }, + { + "epoch": 0.5222332342253055, + "grad_norm": 2.6384060382843018, + "learning_rate": 5.480867693287223e-07, + "logits/chosen": -0.7734386324882507, + "logits/rejected": -0.7963250875473022, + "logps/chosen": -0.8996341824531555, + "logps/rejected": -1.0466523170471191, + "loss": 1.2849, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.799268364906311, + "rewards/margins": 0.2940361201763153, + "rewards/rejected": -2.0933046340942383, + "step": 247 + }, + { + "epoch": 0.5243475388173109, + "grad_norm": 1.3608977794647217, + "learning_rate": 5.443979476614674e-07, + "logits/chosen": -0.7350472807884216, + "logits/rejected": -0.7215992212295532, + "logps/chosen": -0.8887076377868652, + "logps/rejected": -1.0147045850753784, + "loss": 1.3182, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7774152755737305, + "rewards/margins": 0.25199398398399353, + "rewards/rejected": -2.029409170150757, + "step": 248 + }, + { + "epoch": 0.5264618434093161, + "grad_norm": 3.017115354537964, + "learning_rate": 5.407066885822391e-07, + "logits/chosen": -0.827782154083252, + "logits/rejected": -0.8471929430961609, + "logps/chosen": -0.9262440800666809, + "logps/rejected": -1.1658306121826172, + "loss": 1.1882, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.8524881601333618, + "rewards/margins": 0.47917306423187256, + "rewards/rejected": -2.3316612243652344, + "step": 249 + }, + { + "epoch": 0.5285761480013215, + "grad_norm": 0.7805312275886536, + "learning_rate": 5.370131947382214e-07, + "logits/chosen": -0.7815499305725098, + "logits/rejected": -0.8279274702072144, + "logps/chosen": -0.968708872795105, + "logps/rejected": -1.2697322368621826, + "loss": 1.2092, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.93741774559021, + "rewards/margins": 0.6020466685295105, + "rewards/rejected": -2.5394644737243652, + "step": 250 + }, + { + "epoch": 0.5306904525933267, + "grad_norm": 2.229363441467285, + "learning_rate": 5.333176688992855e-07, + "logits/chosen": -0.7824153900146484, + "logits/rejected": -0.8154900074005127, + "logps/chosen": -1.0211957693099976, + "logps/rejected": -1.2145965099334717, + "loss": 1.3074, + "rewards/accuracies": 0.609375, + "rewards/chosen": -2.042391538619995, + "rewards/margins": 0.3868010938167572, + "rewards/rejected": -2.4291930198669434, + "step": 251 + }, + { + "epoch": 0.532804757185332, + "grad_norm": 1.1359837055206299, + "learning_rate": 5.296203139468571e-07, + "logits/chosen": -0.7467613220214844, + "logits/rejected": -0.7548531889915466, + "logps/chosen": -1.0614902973175049, + "logps/rejected": -1.2674376964569092, + "loss": 1.2512, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.1229805946350098, + "rewards/margins": 0.4118950664997101, + "rewards/rejected": -2.5348753929138184, + "step": 252 + }, + { + "epoch": 0.5349190617773373, + "grad_norm": 3.0548548698425293, + "learning_rate": 5.259213328627792e-07, + "logits/chosen": -0.7868636250495911, + "logits/rejected": -0.8130850791931152, + "logps/chosen": -1.0743666887283325, + "logps/rejected": -1.2010191679000854, + "loss": 1.3275, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.148733377456665, + "rewards/margins": 0.2533051669597626, + "rewards/rejected": -2.402038335800171, + "step": 253 + }, + { + "epoch": 0.5370333663693426, + "grad_norm": 1.7205246686935425, + "learning_rate": 5.222209287181676e-07, + "logits/chosen": -0.81404709815979, + "logits/rejected": -0.8481613397598267, + "logps/chosen": -1.1599587202072144, + "logps/rejected": -1.4234716892242432, + "loss": 1.2894, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.3199174404144287, + "rewards/margins": 0.5270256400108337, + "rewards/rejected": -2.8469433784484863, + "step": 254 + }, + { + "epoch": 0.5391476709613479, + "grad_norm": 2.2516112327575684, + "learning_rate": 5.185193046622634e-07, + "logits/chosen": -0.8112510442733765, + "logits/rejected": -0.8310728073120117, + "logps/chosen": -1.1263186931610107, + "logps/rejected": -1.3256827592849731, + "loss": 1.3552, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.2526373863220215, + "rewards/margins": 0.39872825145721436, + "rewards/rejected": -2.6513655185699463, + "step": 255 + }, + { + "epoch": 0.5412619755533532, + "grad_norm": 2.8379359245300293, + "learning_rate": 5.148166639112799e-07, + "logits/chosen": -0.8202102184295654, + "logits/rejected": -0.845209002494812, + "logps/chosen": -1.264180302619934, + "logps/rejected": -1.6190590858459473, + "loss": 1.2083, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.528360605239868, + "rewards/margins": 0.7097575068473816, + "rewards/rejected": -3.2381181716918945, + "step": 256 + }, + { + "epoch": 0.5433762801453584, + "grad_norm": 4.676355838775635, + "learning_rate": 5.111132097372459e-07, + "logits/chosen": -0.8866451978683472, + "logits/rejected": -0.8642281889915466, + "logps/chosen": -1.3194389343261719, + "logps/rejected": -1.4506916999816895, + "loss": 1.4002, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.6388778686523438, + "rewards/margins": 0.2625058591365814, + "rewards/rejected": -2.901383399963379, + "step": 257 + }, + { + "epoch": 0.5454905847373638, + "grad_norm": 2.55251407623291, + "learning_rate": 5.074091454568463e-07, + "logits/chosen": -0.7903708815574646, + "logits/rejected": -0.8010709881782532, + "logps/chosen": -1.3550961017608643, + "logps/rejected": -1.661428451538086, + "loss": 1.2131, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.7101922035217285, + "rewards/margins": 0.6126645803451538, + "rewards/rejected": -3.322856903076172, + "step": 258 + }, + { + "epoch": 0.547604889329369, + "grad_norm": 4.116572856903076, + "learning_rate": 5.037046744202611e-07, + "logits/chosen": -0.7501232624053955, + "logits/rejected": -0.7825176119804382, + "logps/chosen": -1.2111856937408447, + "logps/rejected": -1.5176191329956055, + "loss": 1.1345, + "rewards/accuracies": 0.71875, + "rewards/chosen": -2.4223713874816895, + "rewards/margins": 0.6128667593002319, + "rewards/rejected": -3.035238265991211, + "step": 259 + }, + { + "epoch": 0.5497191939213742, + "grad_norm": 2.0285205841064453, + "learning_rate": 5e-07, + "logits/chosen": -0.8355445861816406, + "logits/rejected": -0.8497716784477234, + "logps/chosen": -1.1876304149627686, + "logps/rejected": -1.4788450002670288, + "loss": 1.1559, + "rewards/accuracies": 0.671875, + "rewards/chosen": -2.375260829925537, + "rewards/margins": 0.5824294090270996, + "rewards/rejected": -2.9576900005340576, + "step": 260 + }, + { + "epoch": 0.5518334985133796, + "grad_norm": 4.681185245513916, + "learning_rate": 4.962953255797389e-07, + "logits/chosen": -0.8240503072738647, + "logits/rejected": -0.8016488552093506, + "logps/chosen": -1.2238959074020386, + "logps/rejected": -1.4727882146835327, + "loss": 1.2914, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.447791814804077, + "rewards/margins": 0.49778467416763306, + "rewards/rejected": -2.9455764293670654, + "step": 261 + }, + { + "epoch": 0.5539478031053848, + "grad_norm": 5.15679931640625, + "learning_rate": 4.925908545431537e-07, + "logits/chosen": -0.728940486907959, + "logits/rejected": -0.7355924248695374, + "logps/chosen": -1.3356778621673584, + "logps/rejected": -1.6726096868515015, + "loss": 1.1434, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.671355724334717, + "rewards/margins": 0.6738637685775757, + "rewards/rejected": -3.345219373703003, + "step": 262 + }, + { + "epoch": 0.5560621076973902, + "grad_norm": 2.481048345565796, + "learning_rate": 4.888867902627543e-07, + "logits/chosen": -0.8311591148376465, + "logits/rejected": -0.8191719055175781, + "logps/chosen": -1.2743335962295532, + "logps/rejected": -1.5339927673339844, + "loss": 1.2164, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.5486671924591064, + "rewards/margins": 0.5193185210227966, + "rewards/rejected": -3.0679855346679688, + "step": 263 + }, + { + "epoch": 0.5581764122893954, + "grad_norm": 3.6758291721343994, + "learning_rate": 4.851833360887201e-07, + "logits/chosen": -0.6787989735603333, + "logits/rejected": -0.668928325176239, + "logps/chosen": -1.2278664112091064, + "logps/rejected": -1.4955706596374512, + "loss": 1.1942, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -2.455732822418213, + "rewards/margins": 0.535408616065979, + "rewards/rejected": -2.9911413192749023, + "step": 264 + }, + { + "epoch": 0.5602907168814008, + "grad_norm": 2.7282023429870605, + "learning_rate": 4.814806953377365e-07, + "logits/chosen": -0.7772133350372314, + "logits/rejected": -0.7689889073371887, + "logps/chosen": -1.1954048871994019, + "logps/rejected": -1.444943904876709, + "loss": 1.2686, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.3908097743988037, + "rewards/margins": 0.4990782141685486, + "rewards/rejected": -2.889887809753418, + "step": 265 + }, + { + "epoch": 0.562405021473406, + "grad_norm": 2.8753116130828857, + "learning_rate": 4.777790712818323e-07, + "logits/chosen": -0.6946043968200684, + "logits/rejected": -0.7001516819000244, + "logps/chosen": -1.2844620943069458, + "logps/rejected": -1.486103892326355, + "loss": 1.284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.5689241886138916, + "rewards/margins": 0.4032836854457855, + "rewards/rejected": -2.97220778465271, + "step": 266 + }, + { + "epoch": 0.5645193260654113, + "grad_norm": 1.5583593845367432, + "learning_rate": 4.740786671372209e-07, + "logits/chosen": -0.7396820187568665, + "logits/rejected": -0.7129873037338257, + "logps/chosen": -1.410097599029541, + "logps/rejected": -1.6091456413269043, + "loss": 1.3158, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.820195198059082, + "rewards/margins": 0.3980959951877594, + "rewards/rejected": -3.2182912826538086, + "step": 267 + }, + { + "epoch": 0.5666336306574166, + "grad_norm": 3.5984952449798584, + "learning_rate": 4.703796860531429e-07, + "logits/chosen": -0.7031830549240112, + "logits/rejected": -0.700330376625061, + "logps/chosen": -1.633664608001709, + "logps/rejected": -1.9186874628067017, + "loss": 1.2479, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.267329216003418, + "rewards/margins": 0.5700456500053406, + "rewards/rejected": -3.8373749256134033, + "step": 268 + }, + { + "epoch": 0.5687479352494219, + "grad_norm": 6.295733451843262, + "learning_rate": 4.666823311007144e-07, + "logits/chosen": -0.8001950979232788, + "logits/rejected": -0.8042099475860596, + "logps/chosen": -1.5675832033157349, + "logps/rejected": -1.9247075319290161, + "loss": 1.1759, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.1351664066314697, + "rewards/margins": 0.7142485976219177, + "rewards/rejected": -3.8494150638580322, + "step": 269 + }, + { + "epoch": 0.5708622398414271, + "grad_norm": 3.6349036693573, + "learning_rate": 4.6298680526177855e-07, + "logits/chosen": -0.8108068704605103, + "logits/rejected": -0.8030902147293091, + "logps/chosen": -1.8205997943878174, + "logps/rejected": -2.195197105407715, + "loss": 1.1864, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.6411995887756348, + "rewards/margins": 0.7491948008537292, + "rewards/rejected": -4.39039421081543, + "step": 270 + }, + { + "epoch": 0.5729765444334325, + "grad_norm": 4.786395072937012, + "learning_rate": 4.59293311417761e-07, + "logits/chosen": -0.798182487487793, + "logits/rejected": -0.7736828327178955, + "logps/chosen": -1.8617057800292969, + "logps/rejected": -2.08984637260437, + "loss": 1.3947, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.7234115600585938, + "rewards/margins": 0.4562810957431793, + "rewards/rejected": -4.17969274520874, + "step": 271 + }, + { + "epoch": 0.5750908490254377, + "grad_norm": 6.7946457862854, + "learning_rate": 4.556020523385326e-07, + "logits/chosen": -0.7530428171157837, + "logits/rejected": -0.7395590543746948, + "logps/chosen": -1.8709862232208252, + "logps/rejected": -2.3599390983581543, + "loss": 1.1025, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.7419724464416504, + "rewards/margins": 0.9779053926467896, + "rewards/rejected": -4.719878196716309, + "step": 272 + }, + { + "epoch": 0.5772051536174431, + "grad_norm": 4.877624988555908, + "learning_rate": 4.5191323067127773e-07, + "logits/chosen": -0.7732480764389038, + "logits/rejected": -0.7835702300071716, + "logps/chosen": -2.0340800285339355, + "logps/rejected": -2.330742835998535, + "loss": 1.3198, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.068160057067871, + "rewards/margins": 0.5933258533477783, + "rewards/rejected": -4.66148567199707, + "step": 273 + }, + { + "epoch": 0.5793194582094483, + "grad_norm": 9.001680374145508, + "learning_rate": 4.482270489293685e-07, + "logits/chosen": -0.9062263369560242, + "logits/rejected": -0.9105854630470276, + "logps/chosen": -2.1364972591400146, + "logps/rejected": -2.4467523097991943, + "loss": 1.3464, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.272994518280029, + "rewards/margins": 0.6205099821090698, + "rewards/rejected": -4.893504619598389, + "step": 274 + }, + { + "epoch": 0.5814337628014535, + "grad_norm": 2.811025619506836, + "learning_rate": 4.445437094812475e-07, + "logits/chosen": -0.8593579530715942, + "logits/rejected": -0.8343831896781921, + "logps/chosen": -2.452843189239502, + "logps/rejected": -2.7551848888397217, + "loss": 1.3536, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.905686378479004, + "rewards/margins": 0.6046838760375977, + "rewards/rejected": -5.510369777679443, + "step": 275 + }, + { + "epoch": 0.5835480673934589, + "grad_norm": 2.2030158042907715, + "learning_rate": 4.4086341453931714e-07, + "logits/chosen": -0.8991417288780212, + "logits/rejected": -0.8766486644744873, + "logps/chosen": -2.30641508102417, + "logps/rejected": -2.7606654167175293, + "loss": 1.1708, + "rewards/accuracies": 0.6875, + "rewards/chosen": -4.61283016204834, + "rewards/margins": 0.9085015654563904, + "rewards/rejected": -5.521330833435059, + "step": 276 + }, + { + "epoch": 0.5856623719854641, + "grad_norm": 5.5185227394104, + "learning_rate": 4.371863661488393e-07, + "logits/chosen": -0.8738227486610413, + "logits/rejected": -0.8665530681610107, + "logps/chosen": -2.29125714302063, + "logps/rejected": -2.7014153003692627, + "loss": 1.1883, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.58251428604126, + "rewards/margins": 0.8203167915344238, + "rewards/rejected": -5.402830600738525, + "step": 277 + }, + { + "epoch": 0.5877766765774695, + "grad_norm": 2.0779521465301514, + "learning_rate": 4.3351276617684285e-07, + "logits/chosen": -0.958415150642395, + "logits/rejected": -0.9585077166557312, + "logps/chosen": -2.4368410110473633, + "logps/rejected": -2.798506736755371, + "loss": 1.1749, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.873682022094727, + "rewards/margins": 0.7233313322067261, + "rewards/rejected": -5.597013473510742, + "step": 278 + }, + { + "epoch": 0.5898909811694747, + "grad_norm": 2.884877920150757, + "learning_rate": 4.29842816301041e-07, + "logits/chosen": -0.9413051605224609, + "logits/rejected": -0.9224691987037659, + "logps/chosen": -2.485034942626953, + "logps/rejected": -2.911332368850708, + "loss": 1.2035, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.970069885253906, + "rewards/margins": 0.8525944948196411, + "rewards/rejected": -5.822664737701416, + "step": 279 + }, + { + "epoch": 0.59200528576148, + "grad_norm": 5.203248500823975, + "learning_rate": 4.2617671799875944e-07, + "logits/chosen": -0.9359334111213684, + "logits/rejected": -0.9387660026550293, + "logps/chosen": -2.378349542617798, + "logps/rejected": -2.730886936187744, + "loss": 1.2253, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.756699085235596, + "rewards/margins": 0.7050745487213135, + "rewards/rejected": -5.461773872375488, + "step": 280 + }, + { + "epoch": 0.5941195903534853, + "grad_norm": 6.818525314331055, + "learning_rate": 4.225146725358758e-07, + "logits/chosen": -0.8864554166793823, + "logits/rejected": -0.8813320398330688, + "logps/chosen": -2.4233975410461426, + "logps/rejected": -2.8188178539276123, + "loss": 1.2281, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -4.846795082092285, + "rewards/margins": 0.7908411622047424, + "rewards/rejected": -5.637635707855225, + "step": 281 + }, + { + "epoch": 0.5962338949454906, + "grad_norm": 2.529154062271118, + "learning_rate": 4.1885688095577e-07, + "logits/chosen": -0.8420325517654419, + "logits/rejected": -0.8822402954101562, + "logps/chosen": -2.626488447189331, + "logps/rejected": -3.1887192726135254, + "loss": 1.0561, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.252976894378662, + "rewards/margins": 1.1244611740112305, + "rewards/rejected": -6.377438545227051, + "step": 282 + }, + { + "epoch": 0.5983481995374959, + "grad_norm": 3.0739686489105225, + "learning_rate": 4.152035440682873e-07, + "logits/chosen": -0.8550993204116821, + "logits/rejected": -0.8528580665588379, + "logps/chosen": -2.6387887001037598, + "logps/rejected": -2.9952192306518555, + "loss": 1.3409, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.2775774002075195, + "rewards/margins": 0.7128612399101257, + "rewards/rejected": -5.990438461303711, + "step": 283 + }, + { + "epoch": 0.6004625041295012, + "grad_norm": 3.6649062633514404, + "learning_rate": 4.1155486243871363e-07, + "logits/chosen": -0.8643282651901245, + "logits/rejected": -0.9175342321395874, + "logps/chosen": -2.929072618484497, + "logps/rejected": -3.105940580368042, + "loss": 1.5121, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.858145236968994, + "rewards/margins": 0.3537355065345764, + "rewards/rejected": -6.211881160736084, + "step": 284 + }, + { + "epoch": 0.6025768087215064, + "grad_norm": 2.5071723461151123, + "learning_rate": 4.0791103637676486e-07, + "logits/chosen": -0.8368631601333618, + "logits/rejected": -0.819808304309845, + "logps/chosen": -3.0672600269317627, + "logps/rejected": -3.4685003757476807, + "loss": 1.3236, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.134520053863525, + "rewards/margins": 0.8024805784225464, + "rewards/rejected": -6.937000751495361, + "step": 285 + }, + { + "epoch": 0.6046911133135118, + "grad_norm": 8.780280113220215, + "learning_rate": 4.042722659255906e-07, + "logits/chosen": -0.8249569535255432, + "logits/rejected": -0.8442113995552063, + "logps/chosen": -3.3199872970581055, + "logps/rejected": -3.7276291847229004, + "loss": 1.322, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -6.639974594116211, + "rewards/margins": 0.8152831792831421, + "rewards/rejected": -7.455258369445801, + "step": 286 + }, + { + "epoch": 0.606805417905517, + "grad_norm": 3.4388678073883057, + "learning_rate": 4.006387508507914e-07, + "logits/chosen": -0.7224047780036926, + "logits/rejected": -0.7616450786590576, + "logps/chosen": -2.9411330223083496, + "logps/rejected": -3.32680082321167, + "loss": 1.2868, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.882266044616699, + "rewards/margins": 0.7713361978530884, + "rewards/rejected": -6.65360164642334, + "step": 287 + }, + { + "epoch": 0.6089197224975224, + "grad_norm": 5.095273971557617, + "learning_rate": 3.970106906294509e-07, + "logits/chosen": -0.7394692897796631, + "logits/rejected": -0.7316830158233643, + "logps/chosen": -2.9902045726776123, + "logps/rejected": -3.469916820526123, + "loss": 1.1694, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.980409145355225, + "rewards/margins": 0.9594244360923767, + "rewards/rejected": -6.939833641052246, + "step": 288 + }, + { + "epoch": 0.6110340270895276, + "grad_norm": 2.1398613452911377, + "learning_rate": 3.933882844391866e-07, + "logits/chosen": -0.8181312084197998, + "logits/rejected": -0.833306610584259, + "logps/chosen": -3.0137529373168945, + "logps/rejected": -3.4241840839385986, + "loss": 1.2453, + "rewards/accuracies": 0.609375, + "rewards/chosen": -6.027505874633789, + "rewards/margins": 0.8208625316619873, + "rewards/rejected": -6.848368167877197, + "step": 289 + }, + { + "epoch": 0.6131483316815328, + "grad_norm": 4.185284614562988, + "learning_rate": 3.89771731147214e-07, + "logits/chosen": -0.7805104851722717, + "logits/rejected": -0.8086984753608704, + "logps/chosen": -2.984957218170166, + "logps/rejected": -3.430112361907959, + "loss": 1.2671, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -5.969914436340332, + "rewards/margins": 0.890310525894165, + "rewards/rejected": -6.860224723815918, + "step": 290 + }, + { + "epoch": 0.6152626362735382, + "grad_norm": 7.104829788208008, + "learning_rate": 3.861612292994292e-07, + "logits/chosen": -0.7788286209106445, + "logits/rejected": -0.8027424216270447, + "logps/chosen": -2.896563768386841, + "logps/rejected": -3.1082046031951904, + "loss": 1.4853, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.793127536773682, + "rewards/margins": 0.42328107357025146, + "rewards/rejected": -6.216409206390381, + "step": 291 + }, + { + "epoch": 0.6173769408655434, + "grad_norm": 3.795579433441162, + "learning_rate": 3.825569771095082e-07, + "logits/chosen": -0.8044757843017578, + "logits/rejected": -0.7828265428543091, + "logps/chosen": -2.8059256076812744, + "logps/rejected": -3.3121094703674316, + "loss": 1.1299, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.611851215362549, + "rewards/margins": 1.0123679637908936, + "rewards/rejected": -6.624218940734863, + "step": 292 + }, + { + "epoch": 0.6194912454575487, + "grad_norm": 4.486142158508301, + "learning_rate": 3.7895917244802655e-07, + "logits/chosen": -0.7511788606643677, + "logits/rejected": -0.7885503768920898, + "logps/chosen": -2.927251100540161, + "logps/rejected": -3.1605303287506104, + "loss": 1.426, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.854502201080322, + "rewards/margins": 0.466558575630188, + "rewards/rejected": -6.321060657501221, + "step": 293 + }, + { + "epoch": 0.621605550049554, + "grad_norm": 3.3942787647247314, + "learning_rate": 3.753680128315952e-07, + "logits/chosen": -0.8230300545692444, + "logits/rejected": -0.8042524456977844, + "logps/chosen": -2.524353504180908, + "logps/rejected": -2.8687357902526855, + "loss": 1.2653, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.048707008361816, + "rewards/margins": 0.6887640953063965, + "rewards/rejected": -5.737471580505371, + "step": 294 + }, + { + "epoch": 0.6237198546415593, + "grad_norm": 4.326812744140625, + "learning_rate": 3.717836954120178e-07, + "logits/chosen": -0.7763381004333496, + "logits/rejected": -0.7852378487586975, + "logps/chosen": -2.4861948490142822, + "logps/rejected": -2.8822267055511475, + "loss": 1.124, + "rewards/accuracies": 0.75, + "rewards/chosen": -4.9723896980285645, + "rewards/margins": 0.7920635938644409, + "rewards/rejected": -5.764453411102295, + "step": 295 + }, + { + "epoch": 0.6258341592335646, + "grad_norm": 3.886293649673462, + "learning_rate": 3.6820641696546627e-07, + "logits/chosen": -0.8350138664245605, + "logits/rejected": -0.8594292998313904, + "logps/chosen": -2.1301493644714355, + "logps/rejected": -2.3678014278411865, + "loss": 1.3532, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.260298728942871, + "rewards/margins": 0.4753049314022064, + "rewards/rejected": -4.735602855682373, + "step": 296 + }, + { + "epoch": 0.6279484638255699, + "grad_norm": 1.9318888187408447, + "learning_rate": 3.6463637388167875e-07, + "logits/chosen": -0.812870979309082, + "logits/rejected": -0.8393633961677551, + "logps/chosen": -2.0607728958129883, + "logps/rejected": -2.4457521438598633, + "loss": 1.2317, + "rewards/accuracies": 0.609375, + "rewards/chosen": -4.121545791625977, + "rewards/margins": 0.76995849609375, + "rewards/rejected": -4.891504287719727, + "step": 297 + }, + { + "epoch": 0.6300627684175751, + "grad_norm": 2.731139898300171, + "learning_rate": 3.610737621531781e-07, + "logits/chosen": -0.7860711216926575, + "logits/rejected": -0.8006534576416016, + "logps/chosen": -1.9324530363082886, + "logps/rejected": -2.2838711738586426, + "loss": 1.2986, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.864906072616577, + "rewards/margins": 0.7028359174728394, + "rewards/rejected": -4.567742347717285, + "step": 298 + }, + { + "epoch": 0.6321770730095805, + "grad_norm": 3.118441581726074, + "learning_rate": 3.575187773645112e-07, + "logits/chosen": -0.6946629285812378, + "logits/rejected": -0.6832380294799805, + "logps/chosen": -2.2569775581359863, + "logps/rejected": -2.6153128147125244, + "loss": 1.2166, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.513955116271973, + "rewards/margins": 0.7166703343391418, + "rewards/rejected": -5.230625629425049, + "step": 299 + }, + { + "epoch": 0.6342913776015857, + "grad_norm": 4.998100757598877, + "learning_rate": 3.5397161468151214e-07, + "logits/chosen": -0.7972643375396729, + "logits/rejected": -0.7864660620689392, + "logps/chosen": -2.227022886276245, + "logps/rejected": -2.57175350189209, + "loss": 1.2796, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.45404577255249, + "rewards/margins": 0.6894608736038208, + "rewards/rejected": -5.14350700378418, + "step": 300 + }, + { + "epoch": 0.6364056821935911, + "grad_norm": 6.259451866149902, + "learning_rate": 3.5043246884058777e-07, + "logits/chosen": -0.6282143592834473, + "logits/rejected": -0.6314865350723267, + "logps/chosen": -2.4372308254241943, + "logps/rejected": -2.8582205772399902, + "loss": 1.1592, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.874461650848389, + "rewards/margins": 0.8419792056083679, + "rewards/rejected": -5.7164411544799805, + "step": 301 + }, + { + "epoch": 0.6385199867855963, + "grad_norm": 2.577531337738037, + "learning_rate": 3.4690153413802653e-07, + "logits/chosen": -0.658220648765564, + "logits/rejected": -0.6330516934394836, + "logps/chosen": -2.6647050380706787, + "logps/rejected": -3.1917996406555176, + "loss": 1.2609, + "rewards/accuracies": 0.671875, + "rewards/chosen": -5.329410076141357, + "rewards/margins": 1.0541892051696777, + "rewards/rejected": -6.383599281311035, + "step": 302 + }, + { + "epoch": 0.6406342913776016, + "grad_norm": 4.733935356140137, + "learning_rate": 3.4337900441933227e-07, + "logits/chosen": -0.5048555731773376, + "logits/rejected": -0.45112305879592896, + "logps/chosen": -2.5193920135498047, + "logps/rejected": -3.1279971599578857, + "loss": 1.0648, + "rewards/accuracies": 0.765625, + "rewards/chosen": -5.038784027099609, + "rewards/margins": 1.2172104120254517, + "rewards/rejected": -6.2559943199157715, + "step": 303 + }, + { + "epoch": 0.6427485959696069, + "grad_norm": 5.54962158203125, + "learning_rate": 3.3986507306858125e-07, + "logits/chosen": -0.5305406451225281, + "logits/rejected": -0.5246613025665283, + "logps/chosen": -2.8851962089538574, + "logps/rejected": -3.248018264770508, + "loss": 1.4329, + "rewards/accuracies": 0.625, + "rewards/chosen": -5.770392417907715, + "rewards/margins": 0.7256444692611694, + "rewards/rejected": -6.496036529541016, + "step": 304 + }, + { + "epoch": 0.6448629005616121, + "grad_norm": 2.827944278717041, + "learning_rate": 3.363599329978066e-07, + "logits/chosen": -0.4795135259628296, + "logits/rejected": -0.4911767244338989, + "logps/chosen": -3.0268373489379883, + "logps/rejected": -3.4411511421203613, + "loss": 1.4083, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.053674697875977, + "rewards/margins": 0.8286278247833252, + "rewards/rejected": -6.882302284240723, + "step": 305 + }, + { + "epoch": 0.6469772051536175, + "grad_norm": 5.35672664642334, + "learning_rate": 3.328637766364075e-07, + "logits/chosen": -0.4823904037475586, + "logits/rejected": -0.48555058240890503, + "logps/chosen": -2.990793466567993, + "logps/rejected": -3.529240846633911, + "loss": 1.1417, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.981586933135986, + "rewards/margins": 1.0768945217132568, + "rewards/rejected": -7.058481693267822, + "step": 306 + }, + { + "epoch": 0.6490915097456227, + "grad_norm": 2.8072359561920166, + "learning_rate": 3.2937679592058396e-07, + "logits/chosen": -0.4903571605682373, + "logits/rejected": -0.46411609649658203, + "logps/chosen": -2.8665530681610107, + "logps/rejected": -3.542123556137085, + "loss": 1.2485, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.7331061363220215, + "rewards/margins": 1.3511409759521484, + "rewards/rejected": -7.08424711227417, + "step": 307 + }, + { + "epoch": 0.651205814337628, + "grad_norm": 6.341434478759766, + "learning_rate": 3.2589918228280066e-07, + "logits/chosen": -0.4496378004550934, + "logits/rejected": -0.35389459133148193, + "logps/chosen": -2.8208916187286377, + "logps/rejected": -3.326601505279541, + "loss": 1.3089, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -5.641783237457275, + "rewards/margins": 1.011419653892517, + "rewards/rejected": -6.653203010559082, + "step": 308 + }, + { + "epoch": 0.6533201189296333, + "grad_norm": 2.5416784286499023, + "learning_rate": 3.2243112664127723e-07, + "logits/chosen": -0.44504135847091675, + "logits/rejected": -0.42088568210601807, + "logps/chosen": -2.7710533142089844, + "logps/rejected": -3.4406185150146484, + "loss": 1.2213, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.542106628417969, + "rewards/margins": 1.3391309976577759, + "rewards/rejected": -6.881237030029297, + "step": 309 + }, + { + "epoch": 0.6554344235216386, + "grad_norm": 4.573229789733887, + "learning_rate": 3.189728193895069e-07, + "logits/chosen": -0.31100764870643616, + "logits/rejected": -0.32552966475486755, + "logps/chosen": -3.099289655685425, + "logps/rejected": -3.5152204036712646, + "loss": 1.3571, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -6.19857931137085, + "rewards/margins": 0.8318620324134827, + "rewards/rejected": -7.030440807342529, + "step": 310 + }, + { + "epoch": 0.6575487281136438, + "grad_norm": 3.7587928771972656, + "learning_rate": 3.155244503858041e-07, + "logits/chosen": -0.4225979447364807, + "logits/rejected": -0.43882372975349426, + "logps/chosen": -2.9082608222961426, + "logps/rejected": -3.2239482402801514, + "loss": 1.3415, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.816521644592285, + "rewards/margins": 0.6313749551773071, + "rewards/rejected": -6.447896480560303, + "step": 311 + }, + { + "epoch": 0.6596630327056492, + "grad_norm": 5.79728889465332, + "learning_rate": 3.12086208942881e-07, + "logits/chosen": -0.48076939582824707, + "logits/rejected": -0.41990721225738525, + "logps/chosen": -2.7089650630950928, + "logps/rejected": -3.29990291595459, + "loss": 1.1423, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.4179301261901855, + "rewards/margins": 1.181876540184021, + "rewards/rejected": -6.59980583190918, + "step": 312 + }, + { + "epoch": 0.6617773372976544, + "grad_norm": 7.405224800109863, + "learning_rate": 3.086582838174551e-07, + "logits/chosen": -0.48003631830215454, + "logits/rejected": -0.40571871399879456, + "logps/chosen": -2.53741455078125, + "logps/rejected": -3.0145747661590576, + "loss": 1.3247, + "rewards/accuracies": 0.609375, + "rewards/chosen": -5.0748291015625, + "rewards/margins": 0.9543203115463257, + "rewards/rejected": -6.029149532318115, + "step": 313 + }, + { + "epoch": 0.6638916418896598, + "grad_norm": 6.371465682983398, + "learning_rate": 3.052408631998863e-07, + "logits/chosen": -0.42537638545036316, + "logits/rejected": -0.39384835958480835, + "logps/chosen": -3.006593942642212, + "logps/rejected": -3.4665465354919434, + "loss": 1.2648, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -6.013187885284424, + "rewards/margins": 0.919904887676239, + "rewards/rejected": -6.933093070983887, + "step": 314 + }, + { + "epoch": 0.666005946481665, + "grad_norm": 4.65411376953125, + "learning_rate": 3.018341347038453e-07, + "logits/chosen": -0.38848310708999634, + "logits/rejected": -0.3435167670249939, + "logps/chosen": -2.9562084674835205, + "logps/rejected": -3.5491316318511963, + "loss": 1.1353, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.912416934967041, + "rewards/margins": 1.1858452558517456, + "rewards/rejected": -7.098263263702393, + "step": 315 + }, + { + "epoch": 0.6681202510736703, + "grad_norm": 5.089771747589111, + "learning_rate": 2.9843828535601397e-07, + "logits/chosen": -0.3452882170677185, + "logits/rejected": -0.29303884506225586, + "logps/chosen": -2.5367987155914307, + "logps/rejected": -3.172724723815918, + "loss": 1.2002, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.073597431182861, + "rewards/margins": 1.2718524932861328, + "rewards/rejected": -6.345449447631836, + "step": 316 + }, + { + "epoch": 0.6702345556656756, + "grad_norm": 4.480255603790283, + "learning_rate": 2.9505350158581697e-07, + "logits/chosen": -0.47401517629623413, + "logits/rejected": -0.45950815081596375, + "logps/chosen": -2.45076322555542, + "logps/rejected": -2.998079299926758, + "loss": 1.2545, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.90152645111084, + "rewards/margins": 1.0946320295333862, + "rewards/rejected": -5.996158599853516, + "step": 317 + }, + { + "epoch": 0.6723488602576809, + "grad_norm": 3.6318399906158447, + "learning_rate": 2.916799692151884e-07, + "logits/chosen": -0.20774951577186584, + "logits/rejected": -0.21114808320999146, + "logps/chosen": -2.8932981491088867, + "logps/rejected": -3.613022565841675, + "loss": 1.1187, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.786596298217773, + "rewards/margins": 1.4394491910934448, + "rewards/rejected": -7.22604513168335, + "step": 318 + }, + { + "epoch": 0.6744631648496862, + "grad_norm": 6.601771831512451, + "learning_rate": 2.883178734483692e-07, + "logits/chosen": -0.3821495473384857, + "logits/rejected": -0.35181915760040283, + "logps/chosen": -2.5047662258148193, + "logps/rejected": -3.074918270111084, + "loss": 1.1545, + "rewards/accuracies": 0.71875, + "rewards/chosen": -5.009532451629639, + "rewards/margins": 1.1403042078018188, + "rewards/rejected": -6.149836540222168, + "step": 319 + }, + { + "epoch": 0.6765774694416914, + "grad_norm": 3.077775716781616, + "learning_rate": 2.849673988617399e-07, + "logits/chosen": -0.4517952799797058, + "logits/rejected": -0.3880998194217682, + "logps/chosen": -2.5404443740844727, + "logps/rejected": -3.007855176925659, + "loss": 1.2441, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -5.080888748168945, + "rewards/margins": 0.9348208904266357, + "rewards/rejected": -6.015710353851318, + "step": 320 + }, + { + "epoch": 0.6786917740336967, + "grad_norm": 4.130971908569336, + "learning_rate": 2.8162872939368674e-07, + "logits/chosen": -0.3455219566822052, + "logits/rejected": -0.3199109137058258, + "logps/chosen": -2.5115320682525635, + "logps/rejected": -3.0809438228607178, + "loss": 1.1814, + "rewards/accuracies": 0.71875, + "rewards/chosen": -5.023064136505127, + "rewards/margins": 1.1388237476348877, + "rewards/rejected": -6.1618876457214355, + "step": 321 + }, + { + "epoch": 0.680806078625702, + "grad_norm": 6.414750099182129, + "learning_rate": 2.783020483345057e-07, + "logits/chosen": -0.500693142414093, + "logits/rejected": -0.43053722381591797, + "logps/chosen": -2.627499580383301, + "logps/rejected": -3.176882266998291, + "loss": 1.2207, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.254999160766602, + "rewards/margins": 1.0987658500671387, + "rewards/rejected": -6.353764533996582, + "step": 322 + }, + { + "epoch": 0.6829203832177073, + "grad_norm": 3.8955185413360596, + "learning_rate": 2.749875383163377e-07, + "logits/chosen": -0.3386150896549225, + "logits/rejected": -0.3456903100013733, + "logps/chosen": -2.5545601844787598, + "logps/rejected": -3.0574111938476562, + "loss": 1.2667, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.1091203689575195, + "rewards/margins": 1.0057018995285034, + "rewards/rejected": -6.1148223876953125, + "step": 323 + }, + { + "epoch": 0.6850346878097126, + "grad_norm": 4.244959831237793, + "learning_rate": 2.7168538130314345e-07, + "logits/chosen": -0.4657687246799469, + "logits/rejected": -0.41878795623779297, + "logps/chosen": -2.3406989574432373, + "logps/rejected": -2.74613094329834, + "loss": 1.2982, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.681397914886475, + "rewards/margins": 0.8108637928962708, + "rewards/rejected": -5.49226188659668, + "step": 324 + }, + { + "epoch": 0.6871489924017179, + "grad_norm": 8.914139747619629, + "learning_rate": 2.683957585807136e-07, + "logits/chosen": -0.42120760679244995, + "logits/rejected": -0.34997111558914185, + "logps/chosen": -2.4362924098968506, + "logps/rejected": -2.8844237327575684, + "loss": 1.3185, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.872584819793701, + "rewards/margins": 0.8962627649307251, + "rewards/rejected": -5.768847465515137, + "step": 325 + }, + { + "epoch": 0.6892632969937231, + "grad_norm": 2.8318073749542236, + "learning_rate": 2.651188507467161e-07, + "logits/chosen": -0.4435175657272339, + "logits/rejected": -0.43688836693763733, + "logps/chosen": -2.316673994064331, + "logps/rejected": -2.6802306175231934, + "loss": 1.2727, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.633347988128662, + "rewards/margins": 0.7271134853363037, + "rewards/rejected": -5.360461235046387, + "step": 326 + }, + { + "epoch": 0.6913776015857285, + "grad_norm": 9.15845012664795, + "learning_rate": 2.618548377007817e-07, + "logits/chosen": -0.4659804105758667, + "logits/rejected": -0.43525823950767517, + "logps/chosen": -2.3177073001861572, + "logps/rejected": -2.674837350845337, + "loss": 1.3204, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.6354146003723145, + "rewards/margins": 0.7142605781555176, + "rewards/rejected": -5.349674701690674, + "step": 327 + }, + { + "epoch": 0.6934919061777337, + "grad_norm": 8.41653060913086, + "learning_rate": 2.5860389863462763e-07, + "logits/chosen": -0.42244386672973633, + "logits/rejected": -0.3488731384277344, + "logps/chosen": -2.3063669204711914, + "logps/rejected": -2.8124496936798096, + "loss": 1.2621, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.612733840942383, + "rewards/margins": 1.0121653079986572, + "rewards/rejected": -5.624899387359619, + "step": 328 + }, + { + "epoch": 0.695606210769739, + "grad_norm": 8.558746337890625, + "learning_rate": 2.5536621202221986e-07, + "logits/chosen": -0.4081762433052063, + "logits/rejected": -0.3913821578025818, + "logps/chosen": -2.331026554107666, + "logps/rejected": -2.799482583999634, + "loss": 1.2435, + "rewards/accuracies": 0.59375, + "rewards/chosen": -4.662053108215332, + "rewards/margins": 0.9369123578071594, + "rewards/rejected": -5.598965167999268, + "step": 329 + }, + { + "epoch": 0.6977205153617443, + "grad_norm": 7.550137519836426, + "learning_rate": 2.521419556099754e-07, + "logits/chosen": -0.5334538221359253, + "logits/rejected": -0.5046267509460449, + "logps/chosen": -2.3662197589874268, + "logps/rejected": -2.8178446292877197, + "loss": 1.2172, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -4.7324395179748535, + "rewards/margins": 0.9032500386238098, + "rewards/rejected": -5.6356892585754395, + "step": 330 + }, + { + "epoch": 0.6998348199537496, + "grad_norm": 4.939478397369385, + "learning_rate": 2.4893130640700364e-07, + "logits/chosen": -0.5103824138641357, + "logits/rejected": -0.49076637625694275, + "logps/chosen": -2.0302557945251465, + "logps/rejected": -2.4443471431732178, + "loss": 1.1939, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.060511589050293, + "rewards/margins": 0.8281831741333008, + "rewards/rejected": -4.8886942863464355, + "step": 331 + }, + { + "epoch": 0.7019491245457549, + "grad_norm": 5.584677219390869, + "learning_rate": 2.4573444067538985e-07, + "logits/chosen": -0.46035417914390564, + "logits/rejected": -0.4546043574810028, + "logps/chosen": -2.1907548904418945, + "logps/rejected": -2.4913454055786133, + "loss": 1.4253, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -4.381509780883789, + "rewards/margins": 0.6011807322502136, + "rewards/rejected": -4.982690811157227, + "step": 332 + }, + { + "epoch": 0.7040634291377602, + "grad_norm": 3.398441791534424, + "learning_rate": 2.425515339205165e-07, + "logits/chosen": -0.5569466352462769, + "logits/rejected": -0.5756793022155762, + "logps/chosen": -2.037411689758301, + "logps/rejected": -2.3700244426727295, + "loss": 1.3425, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -4.074823379516602, + "rewards/margins": 0.6652255654335022, + "rewards/rejected": -4.740048885345459, + "step": 333 + }, + { + "epoch": 0.7061777337297654, + "grad_norm": 8.54529094696045, + "learning_rate": 2.3938276088143e-07, + "logits/chosen": -0.5746757388114929, + "logits/rejected": -0.5874296426773071, + "logps/chosen": -2.1479601860046387, + "logps/rejected": -2.584625244140625, + "loss": 1.2366, + "rewards/accuracies": 0.71875, + "rewards/chosen": -4.295920372009277, + "rewards/margins": 0.8733301758766174, + "rewards/rejected": -5.16925048828125, + "step": 334 + }, + { + "epoch": 0.7082920383217707, + "grad_norm": 5.141815662384033, + "learning_rate": 2.362282955212473e-07, + "logits/chosen": -0.6492913961410522, + "logits/rejected": -0.5812432765960693, + "logps/chosen": -1.9753435850143433, + "logps/rejected": -2.340383768081665, + "loss": 1.2197, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.9506871700286865, + "rewards/margins": 0.7300805449485779, + "rewards/rejected": -4.68076753616333, + "step": 335 + }, + { + "epoch": 0.710406342913776, + "grad_norm": 5.991698265075684, + "learning_rate": 2.3308831101760483e-07, + "logits/chosen": -0.6887751221656799, + "logits/rejected": -0.6923843622207642, + "logps/chosen": -1.577715277671814, + "logps/rejected": -1.861379623413086, + "loss": 1.2608, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.155430555343628, + "rewards/margins": 0.5673283338546753, + "rewards/rejected": -3.722759246826172, + "step": 336 + }, + { + "epoch": 0.7125206475057813, + "grad_norm": 1.5719850063323975, + "learning_rate": 2.2996297975315097e-07, + "logits/chosen": -0.6095813512802124, + "logits/rejected": -0.5842909216880798, + "logps/chosen": -1.6973541975021362, + "logps/rejected": -2.1261086463928223, + "loss": 1.2424, + "rewards/accuracies": 0.609375, + "rewards/chosen": -3.3947083950042725, + "rewards/margins": 0.857509195804596, + "rewards/rejected": -4.2522172927856445, + "step": 337 + }, + { + "epoch": 0.7146349520977866, + "grad_norm": 4.785243511199951, + "learning_rate": 2.2685247330608414e-07, + "logits/chosen": -0.7062411308288574, + "logits/rejected": -0.6849475502967834, + "logps/chosen": -1.6659798622131348, + "logps/rejected": -1.980202555656433, + "loss": 1.2512, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.3319597244262695, + "rewards/margins": 0.6284454464912415, + "rewards/rejected": -3.960405111312866, + "step": 338 + }, + { + "epoch": 0.7167492566897918, + "grad_norm": 4.3183674812316895, + "learning_rate": 2.2375696244073123e-07, + "logits/chosen": -0.6655697822570801, + "logits/rejected": -0.6642571687698364, + "logps/chosen": -1.615012764930725, + "logps/rejected": -1.9022549390792847, + "loss": 1.398, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.23002552986145, + "rewards/margins": 0.5744845867156982, + "rewards/rejected": -3.8045098781585693, + "step": 339 + }, + { + "epoch": 0.7188635612817972, + "grad_norm": 3.458740472793579, + "learning_rate": 2.2067661709817382e-07, + "logits/chosen": -0.6138105988502502, + "logits/rejected": -0.6241220235824585, + "logps/chosen": -1.5244299173355103, + "logps/rejected": -1.8252849578857422, + "loss": 1.2257, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -3.0488598346710205, + "rewards/margins": 0.6017097234725952, + "rewards/rejected": -3.6505699157714844, + "step": 340 + }, + { + "epoch": 0.7209778658738024, + "grad_norm": 3.3990859985351562, + "learning_rate": 2.1761160638691838e-07, + "logits/chosen": -0.596839964389801, + "logits/rejected": -0.5929630398750305, + "logps/chosen": -1.4333155155181885, + "logps/rejected": -1.820554494857788, + "loss": 1.1124, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.866631031036377, + "rewards/margins": 0.7744779586791992, + "rewards/rejected": -3.641108989715576, + "step": 341 + }, + { + "epoch": 0.7230921704658078, + "grad_norm": 2.742016315460205, + "learning_rate": 2.1456209857361246e-07, + "logits/chosen": -0.6483213901519775, + "logits/rejected": -0.6418218612670898, + "logps/chosen": -1.4174959659576416, + "logps/rejected": -1.831233263015747, + "loss": 1.1372, + "rewards/accuracies": 0.703125, + "rewards/chosen": -2.834991931915283, + "rewards/margins": 0.8274745941162109, + "rewards/rejected": -3.662466526031494, + "step": 342 + }, + { + "epoch": 0.725206475057813, + "grad_norm": 2.5489015579223633, + "learning_rate": 2.1152826107380651e-07, + "logits/chosen": -0.599895179271698, + "logits/rejected": -0.6154446005821228, + "logps/chosen": -1.4996072053909302, + "logps/rejected": -1.7961615324020386, + "loss": 1.2288, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.9992144107818604, + "rewards/margins": 0.5931087732315063, + "rewards/rejected": -3.592323064804077, + "step": 343 + }, + { + "epoch": 0.7273207796498183, + "grad_norm": 2.8836190700531006, + "learning_rate": 2.0851026044276405e-07, + "logits/chosen": -0.7359989285469055, + "logits/rejected": -0.7111036777496338, + "logps/chosen": -1.32615065574646, + "logps/rejected": -1.6067696809768677, + "loss": 1.2088, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.65230131149292, + "rewards/margins": 0.5612384080886841, + "rewards/rejected": -3.2135393619537354, + "step": 344 + }, + { + "epoch": 0.7294350842418236, + "grad_norm": 3.1838135719299316, + "learning_rate": 2.0550826236631596e-07, + "logits/chosen": -0.6709272265434265, + "logits/rejected": -0.6708023548126221, + "logps/chosen": -1.2859303951263428, + "logps/rejected": -1.6929675340652466, + "loss": 1.1446, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.5718607902526855, + "rewards/margins": 0.8140743374824524, + "rewards/rejected": -3.385935068130493, + "step": 345 + }, + { + "epoch": 0.7315493888338289, + "grad_norm": 2.4209675788879395, + "learning_rate": 2.025224316517663e-07, + "logits/chosen": -0.7540403604507446, + "logits/rejected": -0.7601196765899658, + "logps/chosen": -1.3634543418884277, + "logps/rejected": -1.6112797260284424, + "loss": 1.2561, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.7269086837768555, + "rewards/margins": 0.4956510066986084, + "rewards/rejected": -3.2225594520568848, + "step": 346 + }, + { + "epoch": 0.7336636934258342, + "grad_norm": 5.405437469482422, + "learning_rate": 1.9955293221884402e-07, + "logits/chosen": -0.7241419553756714, + "logits/rejected": -0.7224253416061401, + "logps/chosen": -1.2650585174560547, + "logps/rejected": -1.639666199684143, + "loss": 1.1565, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.5301170349121094, + "rewards/margins": 0.7492151856422424, + "rewards/rejected": -3.279332399368286, + "step": 347 + }, + { + "epoch": 0.7357779980178395, + "grad_norm": 1.5863631963729858, + "learning_rate": 1.9659992709070344e-07, + "logits/chosen": -0.7479431629180908, + "logits/rejected": -0.7219806909561157, + "logps/chosen": -1.294840931892395, + "logps/rejected": -1.6082017421722412, + "loss": 1.1693, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.58968186378479, + "rewards/margins": 0.6267215013504028, + "rewards/rejected": -3.2164034843444824, + "step": 348 + }, + { + "epoch": 0.7378923026098447, + "grad_norm": 1.7051454782485962, + "learning_rate": 1.936635783849742e-07, + "logits/chosen": -0.6940132975578308, + "logits/rejected": -0.7377297878265381, + "logps/chosen": -1.1897408962249756, + "logps/rejected": -1.631073236465454, + "loss": 1.1069, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.379481792449951, + "rewards/margins": 0.8826643228530884, + "rewards/rejected": -3.262146472930908, + "step": 349 + }, + { + "epoch": 0.74000660720185, + "grad_norm": 2.704514980316162, + "learning_rate": 1.907440473048626e-07, + "logits/chosen": -0.6926394104957581, + "logits/rejected": -0.7064180374145508, + "logps/chosen": -1.1691362857818604, + "logps/rejected": -1.511006236076355, + "loss": 1.1541, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.3382725715637207, + "rewards/margins": 0.6837398409843445, + "rewards/rejected": -3.02201247215271, + "step": 350 + }, + { + "epoch": 0.7421209117938553, + "grad_norm": 2.3685505390167236, + "learning_rate": 1.8784149413030004e-07, + "logits/chosen": -0.7785338759422302, + "logits/rejected": -0.7802280187606812, + "logps/chosen": -1.267012119293213, + "logps/rejected": -1.5235991477966309, + "loss": 1.177, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.534024238586426, + "rewards/margins": 0.5131738781929016, + "rewards/rejected": -3.0471982955932617, + "step": 351 + }, + { + "epoch": 0.7442352163858605, + "grad_norm": 2.8642280101776123, + "learning_rate": 1.849560782091445e-07, + "logits/chosen": -0.8269493579864502, + "logits/rejected": -0.8431333899497986, + "logps/chosen": -1.228893518447876, + "logps/rejected": -1.5784943103790283, + "loss": 1.1764, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.457787036895752, + "rewards/margins": 0.6992017030715942, + "rewards/rejected": -3.1569886207580566, + "step": 352 + }, + { + "epoch": 0.7463495209778659, + "grad_norm": 4.742166996002197, + "learning_rate": 1.8208795794843246e-07, + "logits/chosen": -0.764488160610199, + "logits/rejected": -0.7553139925003052, + "logps/chosen": -1.3095338344573975, + "logps/rejected": -1.6771752834320068, + "loss": 1.0957, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -2.619067668914795, + "rewards/margins": 0.7352830171585083, + "rewards/rejected": -3.3543505668640137, + "step": 353 + }, + { + "epoch": 0.7484638255698711, + "grad_norm": 3.543769359588623, + "learning_rate": 1.7923729080568239e-07, + "logits/chosen": -0.7355642914772034, + "logits/rejected": -0.7744429707527161, + "logps/chosen": -1.3419017791748047, + "logps/rejected": -1.591749668121338, + "loss": 1.2579, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.6838035583496094, + "rewards/margins": 0.4996955990791321, + "rewards/rejected": -3.183499336242676, + "step": 354 + }, + { + "epoch": 0.7505781301618765, + "grad_norm": 4.187947750091553, + "learning_rate": 1.764042332802506e-07, + "logits/chosen": -0.7009099721908569, + "logits/rejected": -0.6947562098503113, + "logps/chosen": -1.3167665004730225, + "logps/rejected": -1.640596866607666, + "loss": 1.2269, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -2.633533000946045, + "rewards/margins": 0.6476608514785767, + "rewards/rejected": -3.281193733215332, + "step": 355 + }, + { + "epoch": 0.7526924347538817, + "grad_norm": 1.7813458442687988, + "learning_rate": 1.7358894090473924e-07, + "logits/chosen": -0.7276792526245117, + "logits/rejected": -0.7536065578460693, + "logps/chosen": -1.401429295539856, + "logps/rejected": -1.7458314895629883, + "loss": 1.1934, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.802858591079712, + "rewards/margins": 0.6888045072555542, + "rewards/rejected": -3.4916629791259766, + "step": 356 + }, + { + "epoch": 0.754806739345887, + "grad_norm": 2.3192296028137207, + "learning_rate": 1.7079156823645801e-07, + "logits/chosen": -0.6756848096847534, + "logits/rejected": -0.6988381743431091, + "logps/chosen": -1.36654531955719, + "logps/rejected": -1.6672351360321045, + "loss": 1.1928, + "rewards/accuracies": 0.671875, + "rewards/chosen": -2.73309063911438, + "rewards/margins": 0.6013798117637634, + "rewards/rejected": -3.334470272064209, + "step": 357 + }, + { + "epoch": 0.7569210439378923, + "grad_norm": 2.7722420692443848, + "learning_rate": 1.6801226884893893e-07, + "logits/chosen": -0.6857397556304932, + "logits/rejected": -0.7169467806816101, + "logps/chosen": -1.4047114849090576, + "logps/rejected": -1.733205795288086, + "loss": 1.16, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -2.8094229698181152, + "rewards/margins": 0.6569885015487671, + "rewards/rejected": -3.466411590576172, + "step": 358 + }, + { + "epoch": 0.7590353485298976, + "grad_norm": 6.300495624542236, + "learning_rate": 1.6525119532350506e-07, + "logits/chosen": -0.7457281947135925, + "logits/rejected": -0.7319377660751343, + "logps/chosen": -1.282365083694458, + "logps/rejected": -1.6675825119018555, + "loss": 1.0742, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -2.564730167388916, + "rewards/margins": 0.7704350352287292, + "rewards/rejected": -3.335165023803711, + "step": 359 + }, + { + "epoch": 0.7611496531219029, + "grad_norm": 3.5068228244781494, + "learning_rate": 1.6250849924089482e-07, + "logits/chosen": -0.7112680077552795, + "logits/rejected": -0.7166794538497925, + "logps/chosen": -1.3996254205703735, + "logps/rejected": -1.6635833978652954, + "loss": 1.2438, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.799250841140747, + "rewards/margins": 0.5279159545898438, + "rewards/rejected": -3.327166795730591, + "step": 360 + }, + { + "epoch": 0.7632639577139082, + "grad_norm": 1.421538233757019, + "learning_rate": 1.5978433117293883e-07, + "logits/chosen": -0.7009663581848145, + "logits/rejected": -0.6878695487976074, + "logps/chosen": -1.4174691438674927, + "logps/rejected": -1.802457332611084, + "loss": 1.0885, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -2.8349382877349854, + "rewards/margins": 0.7699761986732483, + "rewards/rejected": -3.604914665222168, + "step": 361 + }, + { + "epoch": 0.7653782623059134, + "grad_norm": 3.2645766735076904, + "learning_rate": 1.5707884067429471e-07, + "logits/chosen": -0.6865817904472351, + "logits/rejected": -0.7084690928459167, + "logps/chosen": -1.377517819404602, + "logps/rejected": -1.7079989910125732, + "loss": 1.2371, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.755035638809204, + "rewards/margins": 0.660962700843811, + "rewards/rejected": -3.4159979820251465, + "step": 362 + }, + { + "epoch": 0.7674925668979188, + "grad_norm": 1.973783254623413, + "learning_rate": 1.5439217627423695e-07, + "logits/chosen": -0.7317283153533936, + "logits/rejected": -0.7571225166320801, + "logps/chosen": -1.63040030002594, + "logps/rejected": -2.027442216873169, + "loss": 1.1614, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.26080060005188, + "rewards/margins": 0.7940834760665894, + "rewards/rejected": -4.054884433746338, + "step": 363 + }, + { + "epoch": 0.769606871489924, + "grad_norm": 4.545448303222656, + "learning_rate": 1.5172448546850163e-07, + "logits/chosen": -0.6746503710746765, + "logits/rejected": -0.7073549628257751, + "logps/chosen": -1.321073055267334, + "logps/rejected": -1.6741642951965332, + "loss": 1.1609, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -2.642146110534668, + "rewards/margins": 0.7061826586723328, + "rewards/rejected": -3.3483285903930664, + "step": 364 + }, + { + "epoch": 0.7717211760819292, + "grad_norm": 8.678997039794922, + "learning_rate": 1.490759147111894e-07, + "logits/chosen": -0.6089351773262024, + "logits/rejected": -0.6172072291374207, + "logps/chosen": -1.6598318815231323, + "logps/rejected": -1.9151239395141602, + "loss": 1.2762, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.3196637630462646, + "rewards/margins": 0.5105838775634766, + "rewards/rejected": -3.8302478790283203, + "step": 365 + }, + { + "epoch": 0.7738354806739346, + "grad_norm": 3.29367733001709, + "learning_rate": 1.4644660940672627e-07, + "logits/chosen": -0.6255152821540833, + "logits/rejected": -0.6178345680236816, + "logps/chosen": -1.7635339498519897, + "logps/rejected": -2.02409029006958, + "loss": 1.4469, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.5270678997039795, + "rewards/margins": 0.5211121439933777, + "rewards/rejected": -4.04818058013916, + "step": 366 + }, + { + "epoch": 0.7759497852659398, + "grad_norm": 3.500715732574463, + "learning_rate": 1.438367139018796e-07, + "logits/chosen": -0.6738446354866028, + "logits/rejected": -0.671849250793457, + "logps/chosen": -1.603959560394287, + "logps/rejected": -2.140998363494873, + "loss": 0.9771, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.207919120788574, + "rewards/margins": 1.0740783214569092, + "rewards/rejected": -4.281996726989746, + "step": 367 + }, + { + "epoch": 0.7780640898579452, + "grad_norm": 2.8842501640319824, + "learning_rate": 1.412463714778343e-07, + "logits/chosen": -0.6544129252433777, + "logits/rejected": -0.6667245030403137, + "logps/chosen": -1.7409751415252686, + "logps/rejected": -2.1441538333892822, + "loss": 1.1043, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.481950283050537, + "rewards/margins": 0.806357741355896, + "rewards/rejected": -4.2883076667785645, + "step": 368 + }, + { + "epoch": 0.7801783944499504, + "grad_norm": 3.7606077194213867, + "learning_rate": 1.3867572434232728e-07, + "logits/chosen": -0.6620441675186157, + "logits/rejected": -0.6536539793014526, + "logps/chosen": -1.6755543947219849, + "logps/rejected": -2.012425184249878, + "loss": 1.2249, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.3511087894439697, + "rewards/margins": 0.6737421154975891, + "rewards/rejected": -4.024850368499756, + "step": 369 + }, + { + "epoch": 0.7822926990419558, + "grad_norm": 3.284456729888916, + "learning_rate": 1.3612491362183887e-07, + "logits/chosen": -0.6353476047515869, + "logits/rejected": -0.6363587975502014, + "logps/chosen": -1.6001538038253784, + "logps/rejected": -2.0670526027679443, + "loss": 1.0746, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.200307607650757, + "rewards/margins": 0.9337971210479736, + "rewards/rejected": -4.134105205535889, + "step": 370 + }, + { + "epoch": 0.784407003633961, + "grad_norm": 1.9063444137573242, + "learning_rate": 1.3359407935384642e-07, + "logits/chosen": -0.6120063662528992, + "logits/rejected": -0.5794797539710999, + "logps/chosen": -1.4489734172821045, + "logps/rejected": -1.9216854572296143, + "loss": 1.0928, + "rewards/accuracies": 0.71875, + "rewards/chosen": -2.897946834564209, + "rewards/margins": 0.9454240798950195, + "rewards/rejected": -3.8433709144592285, + "step": 371 + }, + { + "epoch": 0.7865213082259663, + "grad_norm": 6.973724842071533, + "learning_rate": 1.3108336047913633e-07, + "logits/chosen": -0.6082984209060669, + "logits/rejected": -0.6162828207015991, + "logps/chosen": -1.7623229026794434, + "logps/rejected": -2.239130735397339, + "loss": 1.2665, + "rewards/accuracies": 0.59375, + "rewards/chosen": -3.5246458053588867, + "rewards/margins": 0.9536150693893433, + "rewards/rejected": -4.478261470794678, + "step": 372 + }, + { + "epoch": 0.7886356128179716, + "grad_norm": 3.874128580093384, + "learning_rate": 1.2859289483417557e-07, + "logits/chosen": -0.5540960431098938, + "logits/rejected": -0.5091680884361267, + "logps/chosen": -1.85587739944458, + "logps/rejected": -2.3959312438964844, + "loss": 1.0672, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.71175479888916, + "rewards/margins": 1.0801074504852295, + "rewards/rejected": -4.791862487792969, + "step": 373 + }, + { + "epoch": 0.7907499174099769, + "grad_norm": 13.771154403686523, + "learning_rate": 1.261228191435445e-07, + "logits/chosen": -0.599963903427124, + "logits/rejected": -0.5765703916549683, + "logps/chosen": -1.7974251508712769, + "logps/rejected": -2.2272088527679443, + "loss": 1.1994, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.5948503017425537, + "rewards/margins": 0.8595672249794006, + "rewards/rejected": -4.454417705535889, + "step": 374 + }, + { + "epoch": 0.7928642220019821, + "grad_norm": 2.5084969997406006, + "learning_rate": 1.2367326901243214e-07, + "logits/chosen": -0.5945304036140442, + "logits/rejected": -0.6021737456321716, + "logps/chosen": -1.9855573177337646, + "logps/rejected": -2.3953022956848145, + "loss": 1.2576, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.9711146354675293, + "rewards/margins": 0.8194906711578369, + "rewards/rejected": -4.790604591369629, + "step": 375 + }, + { + "epoch": 0.7949785265939875, + "grad_norm": 4.571497440338135, + "learning_rate": 1.2124437891918993e-07, + "logits/chosen": -0.5888144373893738, + "logits/rejected": -0.5575076937675476, + "logps/chosen": -1.8334908485412598, + "logps/rejected": -2.153212070465088, + "loss": 1.2104, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.6669816970825195, + "rewards/margins": 0.639442503452301, + "rewards/rejected": -4.306424140930176, + "step": 376 + }, + { + "epoch": 0.7970928311859927, + "grad_norm": 5.023235321044922, + "learning_rate": 1.1883628220795005e-07, + "logits/chosen": -0.632038414478302, + "logits/rejected": -0.6368271708488464, + "logps/chosen": -1.8573570251464844, + "logps/rejected": -2.291320323944092, + "loss": 1.1719, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.7147140502929688, + "rewards/margins": 0.8679270148277283, + "rewards/rejected": -4.582640647888184, + "step": 377 + }, + { + "epoch": 0.7992071357779981, + "grad_norm": 4.98567533493042, + "learning_rate": 1.1644911108130434e-07, + "logits/chosen": -0.5647228360176086, + "logits/rejected": -0.5541558265686035, + "logps/chosen": -1.8232372999191284, + "logps/rejected": -2.2992348670959473, + "loss": 1.1476, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.646474599838257, + "rewards/margins": 0.9519950747489929, + "rewards/rejected": -4.5984697341918945, + "step": 378 + }, + { + "epoch": 0.8013214403700033, + "grad_norm": 9.514540672302246, + "learning_rate": 1.1408299659304682e-07, + "logits/chosen": -0.5385195016860962, + "logits/rejected": -0.5475942492485046, + "logps/chosen": -2.077877998352051, + "logps/rejected": -2.4877052307128906, + "loss": 1.1605, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -4.155755996704102, + "rewards/margins": 0.8196545243263245, + "rewards/rejected": -4.975410461425781, + "step": 379 + }, + { + "epoch": 0.8034357449620085, + "grad_norm": 7.652558326721191, + "learning_rate": 1.1173806864097884e-07, + "logits/chosen": -0.5651392936706543, + "logits/rejected": -0.5097556114196777, + "logps/chosen": -1.9452521800994873, + "logps/rejected": -2.376047134399414, + "loss": 1.2004, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.8905043601989746, + "rewards/margins": 0.8615895509719849, + "rewards/rejected": -4.752094268798828, + "step": 380 + }, + { + "epoch": 0.8055500495540139, + "grad_norm": 6.184218406677246, + "learning_rate": 1.0941445595977766e-07, + "logits/chosen": -0.5738644599914551, + "logits/rejected": -0.570101797580719, + "logps/chosen": -2.0233359336853027, + "logps/rejected": -2.5829384326934814, + "loss": 1.1539, + "rewards/accuracies": 0.71875, + "rewards/chosen": -4.0466718673706055, + "rewards/margins": 1.1192048788070679, + "rewards/rejected": -5.165876865386963, + "step": 381 + }, + { + "epoch": 0.8076643541460191, + "grad_norm": 4.697547435760498, + "learning_rate": 1.0711228611392936e-07, + "logits/chosen": -0.5766915082931519, + "logits/rejected": -0.5619411468505859, + "logps/chosen": -2.0546395778656006, + "logps/rejected": -2.4459054470062256, + "loss": 1.2723, + "rewards/accuracies": 0.59375, + "rewards/chosen": -4.109279155731201, + "rewards/margins": 0.7825320959091187, + "rewards/rejected": -4.891810894012451, + "step": 382 + }, + { + "epoch": 0.8097786587380245, + "grad_norm": 5.595128536224365, + "learning_rate": 1.0483168549072518e-07, + "logits/chosen": -0.6808648109436035, + "logits/rejected": -0.6518751382827759, + "logps/chosen": -1.9909974336624146, + "logps/rejected": -2.3775596618652344, + "loss": 1.2501, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.981994867324829, + "rewards/margins": 0.7731241583824158, + "rewards/rejected": -4.755119323730469, + "step": 383 + }, + { + "epoch": 0.8118929633300297, + "grad_norm": 3.6460607051849365, + "learning_rate": 1.0257277929332331e-07, + "logits/chosen": -0.6901826858520508, + "logits/rejected": -0.703309953212738, + "logps/chosen": -1.9317903518676758, + "logps/rejected": -2.322279930114746, + "loss": 1.1945, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.8635807037353516, + "rewards/margins": 0.780979335308075, + "rewards/rejected": -4.644559860229492, + "step": 384 + } + ], + "logging_steps": 1, + "max_steps": 472, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-448/config.json b/checkpoint-448/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ead276a9e0bbd4a0baae360715d8e80853974eb6 --- /dev/null +++ b/checkpoint-448/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "/home/v-liuzichuan/huggingface/Llama-3.2-3B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/checkpoint-448/generation_config.json b/checkpoint-448/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18d16b4cc3c799dd78e854fd824ddf8a2996285 --- /dev/null +++ b/checkpoint-448/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.2" +} diff --git a/checkpoint-448/model-00001-of-00002.safetensors b/checkpoint-448/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c56a70b15cd470db268d47abc94718a555a74a37 --- /dev/null +++ b/checkpoint-448/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3744f7468b13a3b49f3bf3bcd6a6b546de592a7e834cb31a33eb19fb57966a0 +size 4965799096 diff --git a/checkpoint-448/model-00002-of-00002.safetensors b/checkpoint-448/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..074e86e53575b4170f216ef876d1fa065c68c41a --- /dev/null +++ b/checkpoint-448/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e01589f974c8356339bd3b6d500aa9eb3887946e7d8ed96a48409c16f1c0243 +size 2247734992 diff --git a/checkpoint-448/model.safetensors.index.json b/checkpoint-448/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ed64de846d720b9a7859dc20575fea8e8ca51940 --- /dev/null +++ b/checkpoint-448/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_size": 7213504512 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-448/special_tokens_map.json b/checkpoint-448/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-448/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-448/tokenizer.json b/checkpoint-448/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-448/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-448/tokenizer_config.json b/checkpoint-448/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdb7bfcc59fd293867d98e124a30b3ddf39c93 --- /dev/null +++ b/checkpoint-448/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-448/trainer_state.json b/checkpoint-448/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c315bba437ee095f81ecbf869efb6a6fe4b8ebc5 --- /dev/null +++ b/checkpoint-448/trainer_state.json @@ -0,0 +1,6753 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.947208457218368, + "eval_steps": 500, + "global_step": 448, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002114304592005286, + "grad_norm": 1.5018059015274048, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.3466828167438507, + "logits/rejected": -0.30099987983703613, + "logps/chosen": -0.9345186948776245, + "logps/rejected": -0.9117153882980347, + "loss": 1.4889, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.869037389755249, + "rewards/margins": -0.04560665041208267, + "rewards/rejected": -1.8234307765960693, + "step": 1 + }, + { + "epoch": 0.004228609184010572, + "grad_norm": 0.8093975186347961, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.4310421049594879, + "logits/rejected": -0.39132067561149597, + "logps/chosen": -0.8198825716972351, + "logps/rejected": -0.8644211888313293, + "loss": 1.376, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.6397651433944702, + "rewards/margins": 0.08907715976238251, + "rewards/rejected": -1.7288423776626587, + "step": 2 + }, + { + "epoch": 0.006342913776015857, + "grad_norm": 0.5377389788627625, + "learning_rate": 6.25e-08, + "logits/chosen": -0.46692028641700745, + "logits/rejected": -0.4649256467819214, + "logps/chosen": -0.9087910652160645, + "logps/rejected": -0.9648240804672241, + "loss": 1.3404, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.817582130432129, + "rewards/margins": 0.11206617206335068, + "rewards/rejected": -1.9296481609344482, + "step": 3 + }, + { + "epoch": 0.008457218368021144, + "grad_norm": 0.3221875727176666, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.416828453540802, + "logits/rejected": -0.3584724962711334, + "logps/chosen": -0.7818898558616638, + "logps/rejected": -0.8170815110206604, + "loss": 1.3806, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.5637797117233276, + "rewards/margins": 0.07038339227437973, + "rewards/rejected": -1.6341630220413208, + "step": 4 + }, + { + "epoch": 0.010571522960026428, + "grad_norm": 0.64655601978302, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.376886248588562, + "logits/rejected": -0.3516141474246979, + "logps/chosen": -0.8814125061035156, + "logps/rejected": -1.0214396715164185, + "loss": 1.2741, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7628250122070312, + "rewards/margins": 0.28005433082580566, + "rewards/rejected": -2.042879343032837, + "step": 5 + }, + { + "epoch": 0.012685827552031714, + "grad_norm": 0.4775894582271576, + "learning_rate": 1.25e-07, + "logits/chosen": -0.4757865369319916, + "logits/rejected": -0.4498941898345947, + "logps/chosen": -0.8962199687957764, + "logps/rejected": -0.9462199807167053, + "loss": 1.364, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.7924399375915527, + "rewards/margins": 0.10000008344650269, + "rewards/rejected": -1.8924399614334106, + "step": 6 + }, + { + "epoch": 0.014800132144037, + "grad_norm": 1.2459568977355957, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.38895344734191895, + "logits/rejected": -0.38165366649627686, + "logps/chosen": -0.9025766253471375, + "logps/rejected": -0.9465017318725586, + "loss": 1.3898, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.805153250694275, + "rewards/margins": 0.0878501906991005, + "rewards/rejected": -1.8930034637451172, + "step": 7 + }, + { + "epoch": 0.016914436736042288, + "grad_norm": 0.6195729374885559, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.3964853286743164, + "logits/rejected": -0.377862811088562, + "logps/chosen": -0.9054160118103027, + "logps/rejected": -0.9605879187583923, + "loss": 1.3821, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8108320236206055, + "rewards/margins": 0.1103438138961792, + "rewards/rejected": -1.9211758375167847, + "step": 8 + }, + { + "epoch": 0.019028741328047574, + "grad_norm": 1.2074137926101685, + "learning_rate": 1.875e-07, + "logits/chosen": -0.3729037344455719, + "logits/rejected": -0.38143450021743774, + "logps/chosen": -0.9328653216362, + "logps/rejected": -0.9905799627304077, + "loss": 1.3754, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8657306432724, + "rewards/margins": 0.11542946100234985, + "rewards/rejected": -1.9811599254608154, + "step": 9 + }, + { + "epoch": 0.021143045920052856, + "grad_norm": 0.2867220640182495, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.4263336658477783, + "logits/rejected": -0.42903271317481995, + "logps/chosen": -0.8979260325431824, + "logps/rejected": -0.9078099727630615, + "loss": 1.4438, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7958520650863647, + "rewards/margins": 0.019767940044403076, + "rewards/rejected": -1.815619945526123, + "step": 10 + }, + { + "epoch": 0.023257350512058142, + "grad_norm": 0.8363026976585388, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.3374914526939392, + "logits/rejected": -0.32399696111679077, + "logps/chosen": -0.8886098861694336, + "logps/rejected": -0.9484556317329407, + "loss": 1.3422, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7772197723388672, + "rewards/margins": 0.11969132721424103, + "rewards/rejected": -1.8969112634658813, + "step": 11 + }, + { + "epoch": 0.025371655104063428, + "grad_norm": 0.5406804084777832, + "learning_rate": 2.5e-07, + "logits/chosen": -0.42844679951667786, + "logits/rejected": -0.37984615564346313, + "logps/chosen": -0.861629843711853, + "logps/rejected": -0.8968492150306702, + "loss": 1.3922, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.723259687423706, + "rewards/margins": 0.07043875753879547, + "rewards/rejected": -1.7936984300613403, + "step": 12 + }, + { + "epoch": 0.027485959696068714, + "grad_norm": 0.9919329285621643, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.36495402455329895, + "logits/rejected": -0.3249490261077881, + "logps/chosen": -0.8502095937728882, + "logps/rejected": -0.8470643758773804, + "loss": 1.4334, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7004191875457764, + "rewards/margins": -0.006290358491241932, + "rewards/rejected": -1.6941287517547607, + "step": 13 + }, + { + "epoch": 0.029600264288074, + "grad_norm": 0.5477162003517151, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.4155704081058502, + "logits/rejected": -0.39535820484161377, + "logps/chosen": -1.0430240631103516, + "logps/rejected": -1.1318373680114746, + "loss": 1.3533, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.086048126220703, + "rewards/margins": 0.17762640118598938, + "rewards/rejected": -2.263674736022949, + "step": 14 + }, + { + "epoch": 0.031714568880079286, + "grad_norm": 0.26530712842941284, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.4810572564601898, + "logits/rejected": -0.42454615235328674, + "logps/chosen": -0.8741041421890259, + "logps/rejected": -0.9494178295135498, + "loss": 1.3655, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7482082843780518, + "rewards/margins": 0.15062758326530457, + "rewards/rejected": -1.8988356590270996, + "step": 15 + }, + { + "epoch": 0.033828873472084575, + "grad_norm": 0.9272629618644714, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.4440098702907562, + "logits/rejected": -0.3930297791957855, + "logps/chosen": -0.8473359942436218, + "logps/rejected": -0.9369213581085205, + "loss": 1.3248, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6946719884872437, + "rewards/margins": 0.17917080223560333, + "rewards/rejected": -1.873842716217041, + "step": 16 + }, + { + "epoch": 0.03594317806408986, + "grad_norm": 0.5912418961524963, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.3838099539279938, + "logits/rejected": -0.3507584035396576, + "logps/chosen": -0.8888350129127502, + "logps/rejected": -0.9361770749092102, + "loss": 1.383, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.7776700258255005, + "rewards/margins": 0.0946839451789856, + "rewards/rejected": -1.8723541498184204, + "step": 17 + }, + { + "epoch": 0.03805748265609515, + "grad_norm": 0.6536504030227661, + "learning_rate": 3.75e-07, + "logits/chosen": -0.3581697940826416, + "logits/rejected": -0.3620460629463196, + "logps/chosen": -0.8519617319107056, + "logps/rejected": -0.9022184610366821, + "loss": 1.3841, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7039234638214111, + "rewards/margins": 0.10051343590021133, + "rewards/rejected": -1.8044369220733643, + "step": 18 + }, + { + "epoch": 0.04017178724810043, + "grad_norm": 0.3433632552623749, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.37887442111968994, + "logits/rejected": -0.37543320655822754, + "logps/chosen": -0.9464104175567627, + "logps/rejected": -1.0017329454421997, + "loss": 1.3649, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8928208351135254, + "rewards/margins": 0.11064518243074417, + "rewards/rejected": -2.0034658908843994, + "step": 19 + }, + { + "epoch": 0.04228609184010571, + "grad_norm": 0.9764007329940796, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.44110679626464844, + "logits/rejected": -0.4280649721622467, + "logps/chosen": -0.9046768546104431, + "logps/rejected": -1.0464633703231812, + "loss": 1.2592, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.8093537092208862, + "rewards/margins": 0.2835729420185089, + "rewards/rejected": -2.0929267406463623, + "step": 20 + }, + { + "epoch": 0.044400396432111, + "grad_norm": 1.8563830852508545, + "learning_rate": 4.375e-07, + "logits/chosen": -0.45183491706848145, + "logits/rejected": -0.42935287952423096, + "logps/chosen": -0.9043138027191162, + "logps/rejected": -0.9462392926216125, + "loss": 1.3784, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.8086276054382324, + "rewards/margins": 0.08385094255208969, + "rewards/rejected": -1.892478585243225, + "step": 21 + }, + { + "epoch": 0.046514701024116284, + "grad_norm": 1.3473299741744995, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.37855517864227295, + "logits/rejected": -0.34429043531417847, + "logps/chosen": -0.9284683465957642, + "logps/rejected": -0.9454050064086914, + "loss": 1.4346, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.8569366931915283, + "rewards/margins": 0.03387312963604927, + "rewards/rejected": -1.8908100128173828, + "step": 22 + }, + { + "epoch": 0.04862900561612157, + "grad_norm": 0.940831184387207, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.39172160625457764, + "logits/rejected": -0.3695780634880066, + "logps/chosen": -0.9314202666282654, + "logps/rejected": -1.020229697227478, + "loss": 1.3322, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8628405332565308, + "rewards/margins": 0.17761869728565216, + "rewards/rejected": -2.040459394454956, + "step": 23 + }, + { + "epoch": 0.050743310208126856, + "grad_norm": 0.5783158540725708, + "learning_rate": 5e-07, + "logits/chosen": -0.4958629608154297, + "logits/rejected": -0.4257377088069916, + "logps/chosen": -0.9379237294197083, + "logps/rejected": -0.9415461421012878, + "loss": 1.441, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.8758474588394165, + "rewards/margins": 0.0072449808940291405, + "rewards/rejected": -1.8830922842025757, + "step": 24 + }, + { + "epoch": 0.052857614800132145, + "grad_norm": 1.4209853410720825, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -0.36407172679901123, + "logits/rejected": -0.3331725299358368, + "logps/chosen": -0.9192589521408081, + "logps/rejected": -0.9595308899879456, + "loss": 1.3994, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8385179042816162, + "rewards/margins": 0.080544114112854, + "rewards/rejected": -1.9190617799758911, + "step": 25 + }, + { + "epoch": 0.05497191939213743, + "grad_norm": 0.6310216188430786, + "learning_rate": 5.416666666666666e-07, + "logits/chosen": -0.41772690415382385, + "logits/rejected": -0.36565953493118286, + "logps/chosen": -0.8052878379821777, + "logps/rejected": -0.8673746585845947, + "loss": 1.3356, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6105756759643555, + "rewards/margins": 0.12417369335889816, + "rewards/rejected": -1.7347493171691895, + "step": 26 + }, + { + "epoch": 0.05708622398414272, + "grad_norm": 1.2933462858200073, + "learning_rate": 5.625e-07, + "logits/chosen": -0.4482795000076294, + "logits/rejected": -0.39409321546554565, + "logps/chosen": -0.8339261412620544, + "logps/rejected": -0.8675202131271362, + "loss": 1.3739, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.6678522825241089, + "rewards/margins": 0.06718815863132477, + "rewards/rejected": -1.7350404262542725, + "step": 27 + }, + { + "epoch": 0.059200528576148, + "grad_norm": 0.5808025002479553, + "learning_rate": 5.833333333333334e-07, + "logits/chosen": -0.37116044759750366, + "logits/rejected": -0.3478051722049713, + "logps/chosen": -0.8950318694114685, + "logps/rejected": -0.9756672978401184, + "loss": 1.3505, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.790063738822937, + "rewards/margins": 0.1612708568572998, + "rewards/rejected": -1.9513345956802368, + "step": 28 + }, + { + "epoch": 0.06131483316815329, + "grad_norm": 1.0569533109664917, + "learning_rate": 6.041666666666666e-07, + "logits/chosen": -0.421148419380188, + "logits/rejected": -0.38443076610565186, + "logps/chosen": -0.8021283745765686, + "logps/rejected": -0.8370179533958435, + "loss": 1.3916, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.6042567491531372, + "rewards/margins": 0.06977920234203339, + "rewards/rejected": -1.674035906791687, + "step": 29 + }, + { + "epoch": 0.06342913776015857, + "grad_norm": 0.42577147483825684, + "learning_rate": 6.249999999999999e-07, + "logits/chosen": -0.4429818391799927, + "logits/rejected": -0.3524704575538635, + "logps/chosen": -0.8916822671890259, + "logps/rejected": -0.8985542058944702, + "loss": 1.4321, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.7833645343780518, + "rewards/margins": 0.01374388113617897, + "rewards/rejected": -1.7971084117889404, + "step": 30 + }, + { + "epoch": 0.06554344235216386, + "grad_norm": 1.0056904554367065, + "learning_rate": 6.458333333333333e-07, + "logits/chosen": -0.376451700925827, + "logits/rejected": -0.342519074678421, + "logps/chosen": -0.9038617014884949, + "logps/rejected": -0.953092634677887, + "loss": 1.398, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8077234029769897, + "rewards/margins": 0.09846188127994537, + "rewards/rejected": -1.906185269355774, + "step": 31 + }, + { + "epoch": 0.06765774694416915, + "grad_norm": 0.5494012236595154, + "learning_rate": 6.666666666666666e-07, + "logits/chosen": -0.3459138870239258, + "logits/rejected": -0.3590989410877228, + "logps/chosen": -0.8274999260902405, + "logps/rejected": -0.8776509761810303, + "loss": 1.363, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.654999852180481, + "rewards/margins": 0.1003020703792572, + "rewards/rejected": -1.7553019523620605, + "step": 32 + }, + { + "epoch": 0.06977205153617443, + "grad_norm": 0.693267822265625, + "learning_rate": 6.875e-07, + "logits/chosen": -0.40053680539131165, + "logits/rejected": -0.37323904037475586, + "logps/chosen": -0.8255244493484497, + "logps/rejected": -0.8658804893493652, + "loss": 1.3712, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6510488986968994, + "rewards/margins": 0.08071210980415344, + "rewards/rejected": -1.7317609786987305, + "step": 33 + }, + { + "epoch": 0.07188635612817972, + "grad_norm": 2.213238000869751, + "learning_rate": 7.083333333333334e-07, + "logits/chosen": -0.40097948908805847, + "logits/rejected": -0.38190510869026184, + "logps/chosen": -0.9122671484947205, + "logps/rejected": -0.9549552798271179, + "loss": 1.36, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.824534296989441, + "rewards/margins": 0.0853763073682785, + "rewards/rejected": -1.9099105596542358, + "step": 34 + }, + { + "epoch": 0.074000660720185, + "grad_norm": 0.6859830021858215, + "learning_rate": 7.291666666666666e-07, + "logits/chosen": -0.42501094937324524, + "logits/rejected": -0.42549416422843933, + "logps/chosen": -1.0008373260498047, + "logps/rejected": -1.1157118082046509, + "loss": 1.3294, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.0016746520996094, + "rewards/margins": 0.22974897921085358, + "rewards/rejected": -2.2314236164093018, + "step": 35 + }, + { + "epoch": 0.0761149653121903, + "grad_norm": 0.6468721628189087, + "learning_rate": 7.5e-07, + "logits/chosen": -0.36494994163513184, + "logits/rejected": -0.30433908104896545, + "logps/chosen": -0.9062094688415527, + "logps/rejected": -0.920263409614563, + "loss": 1.4312, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.8124189376831055, + "rewards/margins": 0.02810765616595745, + "rewards/rejected": -1.840526819229126, + "step": 36 + }, + { + "epoch": 0.07822926990419557, + "grad_norm": 0.5085556507110596, + "learning_rate": 7.708333333333333e-07, + "logits/chosen": -0.4677881598472595, + "logits/rejected": -0.456132709980011, + "logps/chosen": -1.0101865530014038, + "logps/rejected": -1.0429682731628418, + "loss": 1.4132, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -2.0203731060028076, + "rewards/margins": 0.06556359678506851, + "rewards/rejected": -2.0859365463256836, + "step": 37 + }, + { + "epoch": 0.08034357449620086, + "grad_norm": 0.23813335597515106, + "learning_rate": 7.916666666666666e-07, + "logits/chosen": -0.3991190791130066, + "logits/rejected": -0.3664044141769409, + "logps/chosen": -0.9578174352645874, + "logps/rejected": -0.9229263067245483, + "loss": 1.4824, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.9156348705291748, + "rewards/margins": -0.06978224962949753, + "rewards/rejected": -1.8458526134490967, + "step": 38 + }, + { + "epoch": 0.08245787908820615, + "grad_norm": 0.587037980556488, + "learning_rate": 8.125e-07, + "logits/chosen": -0.37554049491882324, + "logits/rejected": -0.36305734515190125, + "logps/chosen": -0.8503091931343079, + "logps/rejected": -0.864615261554718, + "loss": 1.4086, + "rewards/accuracies": 0.4296875, + "rewards/chosen": -1.7006183862686157, + "rewards/margins": 0.028611989691853523, + "rewards/rejected": -1.729230523109436, + "step": 39 + }, + { + "epoch": 0.08457218368021142, + "grad_norm": 0.4172501862049103, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.4405443072319031, + "logits/rejected": -0.41723060607910156, + "logps/chosen": -0.8502858877182007, + "logps/rejected": -0.9114271402359009, + "loss": 1.3446, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7005717754364014, + "rewards/margins": 0.12228240817785263, + "rewards/rejected": -1.8228542804718018, + "step": 40 + }, + { + "epoch": 0.08668648827221671, + "grad_norm": 0.9275372624397278, + "learning_rate": 8.541666666666666e-07, + "logits/chosen": -0.4200601577758789, + "logits/rejected": -0.3478623628616333, + "logps/chosen": -0.892408013343811, + "logps/rejected": -0.9276402592658997, + "loss": 1.3887, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.784816026687622, + "rewards/margins": 0.07046431303024292, + "rewards/rejected": -1.8552805185317993, + "step": 41 + }, + { + "epoch": 0.088800792864222, + "grad_norm": 0.7317383289337158, + "learning_rate": 8.75e-07, + "logits/chosen": -0.37675267457962036, + "logits/rejected": -0.33540332317352295, + "logps/chosen": -0.7866061925888062, + "logps/rejected": -0.824250340461731, + "loss": 1.3837, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5732123851776123, + "rewards/margins": 0.07528844475746155, + "rewards/rejected": -1.648500680923462, + "step": 42 + }, + { + "epoch": 0.09091509745622729, + "grad_norm": 0.9452736973762512, + "learning_rate": 8.958333333333334e-07, + "logits/chosen": -0.4662383198738098, + "logits/rejected": -0.4447881579399109, + "logps/chosen": -0.9490666389465332, + "logps/rejected": -1.0112388134002686, + "loss": 1.3412, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8981332778930664, + "rewards/margins": 0.12434446066617966, + "rewards/rejected": -2.022477626800537, + "step": 43 + }, + { + "epoch": 0.09302940204823257, + "grad_norm": 0.2848323881626129, + "learning_rate": 9.166666666666665e-07, + "logits/chosen": -0.41404005885124207, + "logits/rejected": -0.3944583535194397, + "logps/chosen": -0.8224930167198181, + "logps/rejected": -0.8416361808776855, + "loss": 1.4027, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.6449860334396362, + "rewards/margins": 0.038286346942186356, + "rewards/rejected": -1.683272361755371, + "step": 44 + }, + { + "epoch": 0.09514370664023786, + "grad_norm": 0.7165678143501282, + "learning_rate": 9.374999999999999e-07, + "logits/chosen": -0.40475326776504517, + "logits/rejected": -0.3559921383857727, + "logps/chosen": -0.8070214986801147, + "logps/rejected": -0.8993593454360962, + "loss": 1.3148, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6140429973602295, + "rewards/margins": 0.18467575311660767, + "rewards/rejected": -1.7987186908721924, + "step": 45 + }, + { + "epoch": 0.09725801123224315, + "grad_norm": 0.4779021739959717, + "learning_rate": 9.583333333333334e-07, + "logits/chosen": -0.4171525835990906, + "logits/rejected": -0.42166149616241455, + "logps/chosen": -0.7872560024261475, + "logps/rejected": -0.8496187925338745, + "loss": 1.3356, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.574512004852295, + "rewards/margins": 0.12472567707300186, + "rewards/rejected": -1.699237585067749, + "step": 46 + }, + { + "epoch": 0.09937231582424844, + "grad_norm": 0.7870219349861145, + "learning_rate": 9.791666666666667e-07, + "logits/chosen": -0.3734116554260254, + "logits/rejected": -0.32778748869895935, + "logps/chosen": -0.7842286825180054, + "logps/rejected": -0.8161548972129822, + "loss": 1.3647, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5684573650360107, + "rewards/margins": 0.06385258585214615, + "rewards/rejected": -1.6323097944259644, + "step": 47 + }, + { + "epoch": 0.10148662041625371, + "grad_norm": 0.2597256600856781, + "learning_rate": 1e-06, + "logits/chosen": -0.4355677664279938, + "logits/rejected": -0.38983187079429626, + "logps/chosen": -0.8787693977355957, + "logps/rejected": -0.9383041262626648, + "loss": 1.35, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.7575387954711914, + "rewards/margins": 0.11906948685646057, + "rewards/rejected": -1.8766082525253296, + "step": 48 + }, + { + "epoch": 0.103600925008259, + "grad_norm": 0.9942799210548401, + "learning_rate": 9.999862751990697e-07, + "logits/chosen": -0.4244321882724762, + "logits/rejected": -0.4366786777973175, + "logps/chosen": -0.7910157442092896, + "logps/rejected": -0.8630884885787964, + "loss": 1.3166, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.582031488418579, + "rewards/margins": 0.14414538443088531, + "rewards/rejected": -1.7261769771575928, + "step": 49 + }, + { + "epoch": 0.10571522960026429, + "grad_norm": 0.5333903431892395, + "learning_rate": 9.999451015497595e-07, + "logits/chosen": -0.389942467212677, + "logits/rejected": -0.36674585938453674, + "logps/chosen": -0.7312074899673462, + "logps/rejected": -0.7289648652076721, + "loss": 1.4225, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4624149799346924, + "rewards/margins": -0.004485193639993668, + "rewards/rejected": -1.4579297304153442, + "step": 50 + }, + { + "epoch": 0.10782953419226958, + "grad_norm": 0.5712242722511292, + "learning_rate": 9.9987648131247e-07, + "logits/chosen": -0.4622853994369507, + "logits/rejected": -0.3728552460670471, + "logps/chosen": -0.8764299750328064, + "logps/rejected": -0.869678795337677, + "loss": 1.4542, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.7528599500656128, + "rewards/margins": -0.013502337038516998, + "rewards/rejected": -1.739357590675354, + "step": 51 + }, + { + "epoch": 0.10994383878427486, + "grad_norm": 0.2586441934108734, + "learning_rate": 9.99780418254397e-07, + "logits/chosen": -0.37249019742012024, + "logits/rejected": -0.3998304605484009, + "logps/chosen": -0.8435611724853516, + "logps/rejected": -0.9359882473945618, + "loss": 1.3057, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6871223449707031, + "rewards/margins": 0.18485431373119354, + "rewards/rejected": -1.8719764947891235, + "step": 52 + }, + { + "epoch": 0.11205814337628014, + "grad_norm": 1.0829113721847534, + "learning_rate": 9.996569176493268e-07, + "logits/chosen": -0.47697725892066956, + "logits/rejected": -0.4208195209503174, + "logps/chosen": -0.8014968037605286, + "logps/rejected": -0.8703804612159729, + "loss": 1.3523, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6029936075210571, + "rewards/margins": 0.1377674788236618, + "rewards/rejected": -1.7407609224319458, + "step": 53 + }, + { + "epoch": 0.11417244796828543, + "grad_norm": 0.5523208379745483, + "learning_rate": 9.995059862773438e-07, + "logits/chosen": -0.40533363819122314, + "logits/rejected": -0.36801978945732117, + "logps/chosen": -0.7641825675964355, + "logps/rejected": -0.8168596029281616, + "loss": 1.3692, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.528365135192871, + "rewards/margins": 0.10535416752099991, + "rewards/rejected": -1.6337192058563232, + "step": 54 + }, + { + "epoch": 0.11628675256029072, + "grad_norm": 0.614101767539978, + "learning_rate": 9.993276324244605e-07, + "logits/chosen": -0.4476906955242157, + "logits/rejected": -0.40396648645401, + "logps/chosen": -0.8706808090209961, + "logps/rejected": -0.9221430420875549, + "loss": 1.3787, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7413616180419922, + "rewards/margins": 0.10292442888021469, + "rewards/rejected": -1.8442860841751099, + "step": 55 + }, + { + "epoch": 0.118401057152296, + "grad_norm": 0.3428778052330017, + "learning_rate": 9.991218658821608e-07, + "logits/chosen": -0.31709593534469604, + "logits/rejected": -0.2760937213897705, + "logps/chosen": -0.842248797416687, + "logps/rejected": -0.8068034648895264, + "loss": 1.498, + "rewards/accuracies": 0.4375, + "rewards/chosen": -1.684497594833374, + "rewards/margins": -0.07089066505432129, + "rewards/rejected": -1.6136069297790527, + "step": 56 + }, + { + "epoch": 0.12051536174430129, + "grad_norm": 0.6877723932266235, + "learning_rate": 9.988886979468643e-07, + "logits/chosen": -0.41800016164779663, + "logits/rejected": -0.4011584222316742, + "logps/chosen": -0.7845420837402344, + "logps/rejected": -0.834447979927063, + "loss": 1.3491, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5690841674804688, + "rewards/margins": 0.09981165081262589, + "rewards/rejected": -1.668895959854126, + "step": 57 + }, + { + "epoch": 0.12262966633630658, + "grad_norm": 0.9649701714515686, + "learning_rate": 9.98628141419305e-07, + "logits/chosen": -0.4253537058830261, + "logits/rejected": -0.4305458962917328, + "logps/chosen": -0.86476731300354, + "logps/rejected": -0.9080386161804199, + "loss": 1.3639, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.72953462600708, + "rewards/margins": 0.08654248714447021, + "rewards/rejected": -1.8160772323608398, + "step": 58 + }, + { + "epoch": 0.12474397092831185, + "grad_norm": 1.3779780864715576, + "learning_rate": 9.98340210603829e-07, + "logits/chosen": -0.39970022439956665, + "logits/rejected": -0.441428005695343, + "logps/chosen": -0.8662775158882141, + "logps/rejected": -0.9646260738372803, + "loss": 1.3001, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7325550317764282, + "rewards/margins": 0.19669723510742188, + "rewards/rejected": -1.9292521476745605, + "step": 59 + }, + { + "epoch": 0.12685827552031714, + "grad_norm": 0.5366966724395752, + "learning_rate": 9.980249213076084e-07, + "logits/chosen": -0.37770116329193115, + "logits/rejected": -0.35231757164001465, + "logps/chosen": -0.8165755867958069, + "logps/rejected": -0.8619179129600525, + "loss": 1.3699, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6331511735916138, + "rewards/margins": 0.09068439900875092, + "rewards/rejected": -1.723835825920105, + "step": 60 + }, + { + "epoch": 0.12897258011232243, + "grad_norm": 0.36810922622680664, + "learning_rate": 9.976822908397748e-07, + "logits/chosen": -0.4224976897239685, + "logits/rejected": -0.41758257150650024, + "logps/chosen": -0.8445641994476318, + "logps/rejected": -0.9393664598464966, + "loss": 1.3193, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6891283988952637, + "rewards/margins": 0.18960458040237427, + "rewards/rejected": -1.8787329196929932, + "step": 61 + }, + { + "epoch": 0.13108688470432772, + "grad_norm": 0.6838279366493225, + "learning_rate": 9.97312338010468e-07, + "logits/chosen": -0.4168627858161926, + "logits/rejected": -0.36115381121635437, + "logps/chosen": -0.8370552659034729, + "logps/rejected": -0.8352169394493103, + "loss": 1.4284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6741105318069458, + "rewards/margins": -0.0036766715347766876, + "rewards/rejected": -1.6704338788986206, + "step": 62 + }, + { + "epoch": 0.133201189296333, + "grad_norm": 0.39330533146858215, + "learning_rate": 9.969150831298037e-07, + "logits/chosen": -0.4558233618736267, + "logits/rejected": -0.4025765061378479, + "logps/chosen": -0.826255738735199, + "logps/rejected": -0.894213080406189, + "loss": 1.3485, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.652511477470398, + "rewards/margins": 0.13591471314430237, + "rewards/rejected": -1.788426160812378, + "step": 63 + }, + { + "epoch": 0.1353154938883383, + "grad_norm": 0.6055929660797119, + "learning_rate": 9.964905480067584e-07, + "logits/chosen": -0.459463506937027, + "logits/rejected": -0.42943331599235535, + "logps/chosen": -0.7901928424835205, + "logps/rejected": -0.7964221239089966, + "loss": 1.4057, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.580385684967041, + "rewards/margins": 0.012458762153983116, + "rewards/rejected": -1.5928442478179932, + "step": 64 + }, + { + "epoch": 0.13742979848034356, + "grad_norm": 0.37883859872817993, + "learning_rate": 9.960387559479725e-07, + "logits/chosen": -0.4447207450866699, + "logits/rejected": -0.371269553899765, + "logps/chosen": -0.7863065004348755, + "logps/rejected": -0.7983666658401489, + "loss": 1.4202, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.572613000869751, + "rewards/margins": 0.02412020042538643, + "rewards/rejected": -1.5967333316802979, + "step": 65 + }, + { + "epoch": 0.13954410307234885, + "grad_norm": 0.31330156326293945, + "learning_rate": 9.955597317564703e-07, + "logits/chosen": -0.42059677839279175, + "logits/rejected": -0.37605100870132446, + "logps/chosen": -0.7669360637664795, + "logps/rejected": -0.8348797559738159, + "loss": 1.3368, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.533872127532959, + "rewards/margins": 0.13588732481002808, + "rewards/rejected": -1.6697595119476318, + "step": 66 + }, + { + "epoch": 0.14165840766435414, + "grad_norm": 0.4353170096874237, + "learning_rate": 9.950535017302983e-07, + "logits/chosen": -0.3897082805633545, + "logits/rejected": -0.38229796290397644, + "logps/chosen": -0.7249190807342529, + "logps/rejected": -0.7696882486343384, + "loss": 1.3511, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4498381614685059, + "rewards/margins": 0.08953814208507538, + "rewards/rejected": -1.5393764972686768, + "step": 67 + }, + { + "epoch": 0.14377271225635943, + "grad_norm": 0.6724106669425964, + "learning_rate": 9.94520093661082e-07, + "logits/chosen": -0.3687596023082733, + "logits/rejected": -0.34222811460494995, + "logps/chosen": -0.7845972776412964, + "logps/rejected": -0.8308086395263672, + "loss": 1.3737, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5691945552825928, + "rewards/margins": 0.09242270141839981, + "rewards/rejected": -1.6616172790527344, + "step": 68 + }, + { + "epoch": 0.14588701684836472, + "grad_norm": 0.7312172651290894, + "learning_rate": 9.939595368324994e-07, + "logits/chosen": -0.4475817382335663, + "logits/rejected": -0.3975730538368225, + "logps/chosen": -0.7314785718917847, + "logps/rejected": -0.7924487590789795, + "loss": 1.3439, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4629571437835693, + "rewards/margins": 0.12194043397903442, + "rewards/rejected": -1.584897518157959, + "step": 69 + }, + { + "epoch": 0.14800132144037, + "grad_norm": 0.301097571849823, + "learning_rate": 9.933718620186744e-07, + "logits/chosen": -0.402032732963562, + "logits/rejected": -0.3640722632408142, + "logps/chosen": -0.7727882862091064, + "logps/rejected": -0.8291516304016113, + "loss": 1.358, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.545576572418213, + "rewards/margins": 0.11272668838500977, + "rewards/rejected": -1.6583032608032227, + "step": 70 + }, + { + "epoch": 0.1501156260323753, + "grad_norm": 0.377835750579834, + "learning_rate": 9.92757101482486e-07, + "logits/chosen": -0.316825270652771, + "logits/rejected": -0.3245603144168854, + "logps/chosen": -0.7962774634361267, + "logps/rejected": -0.8610175848007202, + "loss": 1.3464, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5925549268722534, + "rewards/margins": 0.12948019802570343, + "rewards/rejected": -1.7220351696014404, + "step": 71 + }, + { + "epoch": 0.1522299306243806, + "grad_norm": 0.84058678150177, + "learning_rate": 9.921152889737984e-07, + "logits/chosen": -0.4446060359477997, + "logits/rejected": -0.43160340189933777, + "logps/chosen": -0.7745426297187805, + "logps/rejected": -0.8286185264587402, + "loss": 1.3408, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.549085259437561, + "rewards/margins": 0.10815180093050003, + "rewards/rejected": -1.6572370529174805, + "step": 72 + }, + { + "epoch": 0.15434423521638585, + "grad_norm": 0.6970808506011963, + "learning_rate": 9.91446459727607e-07, + "logits/chosen": -0.4220297634601593, + "logits/rejected": -0.404453307390213, + "logps/chosen": -0.8769615888595581, + "logps/rejected": -0.9564313292503357, + "loss": 1.3423, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7539231777191162, + "rewards/margins": 0.15893957018852234, + "rewards/rejected": -1.9128626585006714, + "step": 73 + }, + { + "epoch": 0.15645853980839114, + "grad_norm": 0.5900676250457764, + "learning_rate": 9.90750650462105e-07, + "logits/chosen": -0.41884100437164307, + "logits/rejected": -0.38551777601242065, + "logps/chosen": -0.814996063709259, + "logps/rejected": -0.8892688751220703, + "loss": 1.3325, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.629992127418518, + "rewards/margins": 0.14854571223258972, + "rewards/rejected": -1.7785377502441406, + "step": 74 + }, + { + "epoch": 0.15857284440039643, + "grad_norm": 0.7245749831199646, + "learning_rate": 9.900278993766668e-07, + "logits/chosen": -0.3451727330684662, + "logits/rejected": -0.3348972201347351, + "logps/chosen": -0.8788102865219116, + "logps/rejected": -0.9273182153701782, + "loss": 1.377, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7576205730438232, + "rewards/margins": 0.09701582789421082, + "rewards/rejected": -1.8546364307403564, + "step": 75 + }, + { + "epoch": 0.16068714899240172, + "grad_norm": 0.3135383725166321, + "learning_rate": 9.89278246149752e-07, + "logits/chosen": -0.4140404760837555, + "logits/rejected": -0.38082340359687805, + "logps/chosen": -0.7513999342918396, + "logps/rejected": -0.7880118489265442, + "loss": 1.3921, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5027998685836792, + "rewards/margins": 0.07322371751070023, + "rewards/rejected": -1.5760236978530884, + "step": 76 + }, + { + "epoch": 0.162801453584407, + "grad_norm": 0.8594076633453369, + "learning_rate": 9.885017319367252e-07, + "logits/chosen": -0.35951656103134155, + "logits/rejected": -0.30456626415252686, + "logps/chosen": -0.7989844679832458, + "logps/rejected": -0.8145395517349243, + "loss": 1.4061, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5979689359664917, + "rewards/margins": 0.031110182404518127, + "rewards/rejected": -1.6290791034698486, + "step": 77 + }, + { + "epoch": 0.1649157581764123, + "grad_norm": 0.4393538534641266, + "learning_rate": 9.876983993675989e-07, + "logits/chosen": -0.33119240403175354, + "logits/rejected": -0.31970253586769104, + "logps/chosen": -0.721772313117981, + "logps/rejected": -0.7868390679359436, + "loss": 1.3325, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.443544626235962, + "rewards/margins": 0.13013358414173126, + "rewards/rejected": -1.5736781358718872, + "step": 78 + }, + { + "epoch": 0.1670300627684176, + "grad_norm": 0.8017925024032593, + "learning_rate": 9.868682925446909e-07, + "logits/chosen": -0.3608989417552948, + "logits/rejected": -0.3497124910354614, + "logps/chosen": -0.8117240071296692, + "logps/rejected": -0.8506529331207275, + "loss": 1.3671, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6234480142593384, + "rewards/margins": 0.07785768806934357, + "rewards/rejected": -1.701305866241455, + "step": 79 + }, + { + "epoch": 0.16914436736042285, + "grad_norm": 0.5704994201660156, + "learning_rate": 9.860114570402054e-07, + "logits/chosen": -0.37441548705101013, + "logits/rejected": -0.32188406586647034, + "logps/chosen": -0.745419442653656, + "logps/rejected": -0.816170871257782, + "loss": 1.3315, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.490838885307312, + "rewards/margins": 0.1415030062198639, + "rewards/rejected": -1.632341742515564, + "step": 80 + }, + { + "epoch": 0.17125867195242814, + "grad_norm": 3.0005106925964355, + "learning_rate": 9.85127939893729e-07, + "logits/chosen": -0.34791454672813416, + "logits/rejected": -0.32542383670806885, + "logps/chosen": -0.7547991275787354, + "logps/rejected": -0.7868378162384033, + "loss": 1.3742, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5095982551574707, + "rewards/margins": 0.06407731771469116, + "rewards/rejected": -1.5736756324768066, + "step": 81 + }, + { + "epoch": 0.17337297654443343, + "grad_norm": 0.5891271829605103, + "learning_rate": 9.842177896096493e-07, + "logits/chosen": -0.38649702072143555, + "logits/rejected": -0.36892226338386536, + "logps/chosen": -0.7556143999099731, + "logps/rejected": -0.82858806848526, + "loss": 1.3233, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5112287998199463, + "rewards/margins": 0.14594702422618866, + "rewards/rejected": -1.65717613697052, + "step": 82 + }, + { + "epoch": 0.17548728113643872, + "grad_norm": 0.27861157059669495, + "learning_rate": 9.832810561544923e-07, + "logits/chosen": -0.38264670968055725, + "logits/rejected": -0.35908499360084534, + "logps/chosen": -0.7858557105064392, + "logps/rejected": -0.8571599721908569, + "loss": 1.3234, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5717114210128784, + "rewards/margins": 0.1426086127758026, + "rewards/rejected": -1.7143199443817139, + "step": 83 + }, + { + "epoch": 0.177601585728444, + "grad_norm": 0.8158763647079468, + "learning_rate": 9.823177909541793e-07, + "logits/chosen": -0.4076104760169983, + "logits/rejected": -0.3934200704097748, + "logps/chosen": -0.8089872002601624, + "logps/rejected": -0.8885407447814941, + "loss": 1.3476, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6179744005203247, + "rewards/margins": 0.15910708904266357, + "rewards/rejected": -1.7770814895629883, + "step": 84 + }, + { + "epoch": 0.1797158903204493, + "grad_norm": 0.46008333563804626, + "learning_rate": 9.813280468912022e-07, + "logits/chosen": -0.33124151825904846, + "logits/rejected": -0.34535717964172363, + "logps/chosen": -0.733020544052124, + "logps/rejected": -0.8716557621955872, + "loss": 1.2807, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.466041088104248, + "rewards/margins": 0.27727028727531433, + "rewards/rejected": -1.7433115243911743, + "step": 85 + }, + { + "epoch": 0.18183019491245458, + "grad_norm": 0.3784334659576416, + "learning_rate": 9.80311878301722e-07, + "logits/chosen": -0.40713849663734436, + "logits/rejected": -0.3808574080467224, + "logps/chosen": -0.7063947319984436, + "logps/rejected": -0.7589148879051208, + "loss": 1.3501, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4127894639968872, + "rewards/margins": 0.10504024475812912, + "rewards/rejected": -1.5178297758102417, + "step": 86 + }, + { + "epoch": 0.18394449950445987, + "grad_norm": 0.6137813329696655, + "learning_rate": 9.792693409725853e-07, + "logits/chosen": -0.4119255542755127, + "logits/rejected": -0.44221603870391846, + "logps/chosen": -0.795850932598114, + "logps/rejected": -0.8925026059150696, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.591701865196228, + "rewards/margins": 0.19330324232578278, + "rewards/rejected": -1.7850052118301392, + "step": 87 + }, + { + "epoch": 0.18605880409646514, + "grad_norm": 0.5354250073432922, + "learning_rate": 9.78200492138261e-07, + "logits/chosen": -0.3792279064655304, + "logits/rejected": -0.3789527714252472, + "logps/chosen": -0.7249161005020142, + "logps/rejected": -0.8088154792785645, + "loss": 1.3304, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4498322010040283, + "rewards/margins": 0.16779886186122894, + "rewards/rejected": -1.617630958557129, + "step": 88 + }, + { + "epoch": 0.18817310868847043, + "grad_norm": 0.6911243796348572, + "learning_rate": 9.771053904776995e-07, + "logits/chosen": -0.38837429881095886, + "logits/rejected": -0.36597418785095215, + "logps/chosen": -0.7528612017631531, + "logps/rejected": -0.7981135249137878, + "loss": 1.3481, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.5057224035263062, + "rewards/margins": 0.09050464630126953, + "rewards/rejected": -1.5962270498275757, + "step": 89 + }, + { + "epoch": 0.19028741328047571, + "grad_norm": 0.37110790610313416, + "learning_rate": 9.759840961111097e-07, + "logits/chosen": -0.3804919421672821, + "logits/rejected": -0.38750600814819336, + "logps/chosen": -0.8673248291015625, + "logps/rejected": -0.9381619691848755, + "loss": 1.3303, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.734649658203125, + "rewards/margins": 0.14167429506778717, + "rewards/rejected": -1.876323938369751, + "step": 90 + }, + { + "epoch": 0.192401717872481, + "grad_norm": 0.8033086657524109, + "learning_rate": 9.748366705966593e-07, + "logits/chosen": -0.3804866075515747, + "logits/rejected": -0.31055447459220886, + "logps/chosen": -0.7535511255264282, + "logps/rejected": -0.7824290990829468, + "loss": 1.3706, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5071022510528564, + "rewards/margins": 0.057755980640649796, + "rewards/rejected": -1.5648581981658936, + "step": 91 + }, + { + "epoch": 0.1945160224644863, + "grad_norm": 0.6934167742729187, + "learning_rate": 9.736631769270957e-07, + "logits/chosen": -0.443461149930954, + "logits/rejected": -0.4398806691169739, + "logps/chosen": -0.8123858571052551, + "logps/rejected": -0.8972252607345581, + "loss": 1.3464, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6247717142105103, + "rewards/margins": 0.16967862844467163, + "rewards/rejected": -1.7944505214691162, + "step": 92 + }, + { + "epoch": 0.19663032705649158, + "grad_norm": 1.140067458152771, + "learning_rate": 9.724636795262866e-07, + "logits/chosen": -0.43793433904647827, + "logits/rejected": -0.4402340352535248, + "logps/chosen": -0.8155819177627563, + "logps/rejected": -0.8659977912902832, + "loss": 1.3621, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6311638355255127, + "rewards/margins": 0.10083187371492386, + "rewards/rejected": -1.7319955825805664, + "step": 93 + }, + { + "epoch": 0.19874463164849687, + "grad_norm": 1.8303897380828857, + "learning_rate": 9.712382442456844e-07, + "logits/chosen": -0.34288379549980164, + "logits/rejected": -0.36632782220840454, + "logps/chosen": -0.7338054776191711, + "logps/rejected": -0.8537961840629578, + "loss": 1.2942, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4676109552383423, + "rewards/margins": 0.23998141288757324, + "rewards/rejected": -1.7075923681259155, + "step": 94 + }, + { + "epoch": 0.20085893624050213, + "grad_norm": 0.34392252564430237, + "learning_rate": 9.6998693836071e-07, + "logits/chosen": -0.4381723999977112, + "logits/rejected": -0.4031081199645996, + "logps/chosen": -0.7130292057991028, + "logps/rejected": -0.7402217388153076, + "loss": 1.3766, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4260584115982056, + "rewards/margins": 0.0543849840760231, + "rewards/rejected": -1.4804434776306152, + "step": 95 + }, + { + "epoch": 0.20297324083250742, + "grad_norm": 0.4129842519760132, + "learning_rate": 9.687098305670604e-07, + "logits/chosen": -0.39796924591064453, + "logits/rejected": -0.3476859927177429, + "logps/chosen": -0.7520885467529297, + "logps/rejected": -0.8058558702468872, + "loss": 1.3663, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5041770935058594, + "rewards/margins": 0.10753461718559265, + "rewards/rejected": -1.6117117404937744, + "step": 96 + }, + { + "epoch": 0.2050875454245127, + "grad_norm": 0.5054985284805298, + "learning_rate": 9.674069909769362e-07, + "logits/chosen": -0.3942393660545349, + "logits/rejected": -0.3627544045448303, + "logps/chosen": -0.733702540397644, + "logps/rejected": -0.781308650970459, + "loss": 1.342, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.467405080795288, + "rewards/margins": 0.09521210938692093, + "rewards/rejected": -1.562617301940918, + "step": 97 + }, + { + "epoch": 0.207201850016518, + "grad_norm": 0.6975870728492737, + "learning_rate": 9.66078491115194e-07, + "logits/chosen": -0.38557127118110657, + "logits/rejected": -0.3581204414367676, + "logps/chosen": -0.7359838485717773, + "logps/rejected": -0.7648134827613831, + "loss": 1.3841, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.4719676971435547, + "rewards/margins": 0.057659298181533813, + "rewards/rejected": -1.5296269655227661, + "step": 98 + }, + { + "epoch": 0.2093161546085233, + "grad_norm": 0.7575029730796814, + "learning_rate": 9.647244039154177e-07, + "logits/chosen": -0.3871467411518097, + "logits/rejected": -0.3941374123096466, + "logps/chosen": -0.6516871452331543, + "logps/rejected": -0.7066073417663574, + "loss": 1.3364, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3033742904663086, + "rewards/margins": 0.10984040796756744, + "rewards/rejected": -1.4132146835327148, + "step": 99 + }, + { + "epoch": 0.21143045920052858, + "grad_norm": 1.3344250917434692, + "learning_rate": 9.633448037159166e-07, + "logits/chosen": -0.40887755155563354, + "logits/rejected": -0.41733911633491516, + "logps/chosen": -0.6978950500488281, + "logps/rejected": -0.793424129486084, + "loss": 1.3076, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3957901000976562, + "rewards/margins": 0.1910584717988968, + "rewards/rejected": -1.586848258972168, + "step": 100 + }, + { + "epoch": 0.21354476379253387, + "grad_norm": 0.8798456788063049, + "learning_rate": 9.619397662556433e-07, + "logits/chosen": -0.302534282207489, + "logits/rejected": -0.29954588413238525, + "logps/chosen": -0.719552755355835, + "logps/rejected": -0.7628123164176941, + "loss": 1.3699, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.43910551071167, + "rewards/margins": 0.08651915192604065, + "rewards/rejected": -1.5256246328353882, + "step": 101 + }, + { + "epoch": 0.21565906838453916, + "grad_norm": 0.8746365308761597, + "learning_rate": 9.605093686700353e-07, + "logits/chosen": -0.372263640165329, + "logits/rejected": -0.3714321255683899, + "logps/chosen": -0.6665956974029541, + "logps/rejected": -0.7361368536949158, + "loss": 1.3173, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3331913948059082, + "rewards/margins": 0.1390824019908905, + "rewards/rejected": -1.4722737073898315, + "step": 102 + }, + { + "epoch": 0.21777337297654442, + "grad_norm": 0.8450930714607239, + "learning_rate": 9.590536894867812e-07, + "logits/chosen": -0.37228280305862427, + "logits/rejected": -0.37763556838035583, + "logps/chosen": -0.7425979375839233, + "logps/rejected": -0.7557005882263184, + "loss": 1.4085, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4851958751678467, + "rewards/margins": 0.026205357164144516, + "rewards/rejected": -1.5114011764526367, + "step": 103 + }, + { + "epoch": 0.2198876775685497, + "grad_norm": 0.5075035691261292, + "learning_rate": 9.575728086215091e-07, + "logits/chosen": -0.4433964788913727, + "logits/rejected": -0.3782787024974823, + "logps/chosen": -0.7308244109153748, + "logps/rejected": -0.8043883442878723, + "loss": 1.3353, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4616488218307495, + "rewards/margins": 0.14712783694267273, + "rewards/rejected": -1.6087766885757446, + "step": 104 + }, + { + "epoch": 0.222001982160555, + "grad_norm": 1.0270946025848389, + "learning_rate": 9.560668073733993e-07, + "logits/chosen": -0.3593980073928833, + "logits/rejected": -0.3159312903881073, + "logps/chosen": -0.757469892501831, + "logps/rejected": -0.8256179094314575, + "loss": 1.3289, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.514939785003662, + "rewards/margins": 0.13629598915576935, + "rewards/rejected": -1.651235818862915, + "step": 105 + }, + { + "epoch": 0.2241162867525603, + "grad_norm": 0.8390078544616699, + "learning_rate": 9.54535768420721e-07, + "logits/chosen": -0.3266332149505615, + "logits/rejected": -0.3008713722229004, + "logps/chosen": -0.7286102771759033, + "logps/rejected": -0.7803273797035217, + "loss": 1.3593, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4572205543518066, + "rewards/margins": 0.10343428701162338, + "rewards/rejected": -1.5606547594070435, + "step": 106 + }, + { + "epoch": 0.22623059134456558, + "grad_norm": 1.130595088005066, + "learning_rate": 9.529797758162934e-07, + "logits/chosen": -0.36109817028045654, + "logits/rejected": -0.34797021746635437, + "logps/chosen": -0.7723361253738403, + "logps/rejected": -0.8873662352561951, + "loss": 1.2956, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5446722507476807, + "rewards/margins": 0.23006024956703186, + "rewards/rejected": -1.7747324705123901, + "step": 107 + }, + { + "epoch": 0.22834489593657087, + "grad_norm": 0.9399718642234802, + "learning_rate": 9.513989149828717e-07, + "logits/chosen": -0.3596777021884918, + "logits/rejected": -0.3660539388656616, + "logps/chosen": -0.7130635976791382, + "logps/rejected": -0.7378955483436584, + "loss": 1.3774, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4261271953582764, + "rewards/margins": 0.04966379329562187, + "rewards/rejected": -1.475791096687317, + "step": 108 + }, + { + "epoch": 0.23045920052857616, + "grad_norm": 1.097594976425171, + "learning_rate": 9.49793272708457e-07, + "logits/chosen": -0.31783169507980347, + "logits/rejected": -0.3008044362068176, + "logps/chosen": -0.6933202147483826, + "logps/rejected": -0.7510000467300415, + "loss": 1.3498, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3866404294967651, + "rewards/margins": 0.1153596043586731, + "rewards/rejected": -1.502000093460083, + "step": 109 + }, + { + "epoch": 0.23257350512058145, + "grad_norm": 1.096330165863037, + "learning_rate": 9.481629371415313e-07, + "logits/chosen": -0.3582899570465088, + "logits/rejected": -0.3120020031929016, + "logps/chosen": -0.817268431186676, + "logps/rejected": -0.8862374424934387, + "loss": 1.3349, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.634536862373352, + "rewards/margins": 0.13793781399726868, + "rewards/rejected": -1.7724748849868774, + "step": 110 + }, + { + "epoch": 0.2346878097125867, + "grad_norm": 0.8261978626251221, + "learning_rate": 9.465079977862192e-07, + "logits/chosen": -0.41336673498153687, + "logits/rejected": -0.39544352889060974, + "logps/chosen": -0.7673372030258179, + "logps/rejected": -0.8331737518310547, + "loss": 1.3373, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5346744060516357, + "rewards/margins": 0.1316729635000229, + "rewards/rejected": -1.6663475036621094, + "step": 111 + }, + { + "epoch": 0.236802114304592, + "grad_norm": 0.5922806262969971, + "learning_rate": 9.448285454973737e-07, + "logits/chosen": -0.3224758207798004, + "logits/rejected": -0.3118049204349518, + "logps/chosen": -0.7584627866744995, + "logps/rejected": -0.8859898447990417, + "loss": 1.2731, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.516925573348999, + "rewards/margins": 0.2550540566444397, + "rewards/rejected": -1.7719796895980835, + "step": 112 + }, + { + "epoch": 0.2389164188965973, + "grad_norm": 1.3172541856765747, + "learning_rate": 9.431246724755877e-07, + "logits/chosen": -0.4287208318710327, + "logits/rejected": -0.3984590172767639, + "logps/chosen": -0.7587048411369324, + "logps/rejected": -0.7860502004623413, + "loss": 1.3832, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5174096822738647, + "rewards/margins": 0.05469079315662384, + "rewards/rejected": -1.5721004009246826, + "step": 113 + }, + { + "epoch": 0.24103072348860258, + "grad_norm": 0.7749882340431213, + "learning_rate": 9.413964722621337e-07, + "logits/chosen": -0.39085906744003296, + "logits/rejected": -0.3316206932067871, + "logps/chosen": -0.7035898566246033, + "logps/rejected": -0.7375759482383728, + "loss": 1.387, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4071797132492065, + "rewards/margins": 0.06797221302986145, + "rewards/rejected": -1.4751518964767456, + "step": 114 + }, + { + "epoch": 0.24314502808060787, + "grad_norm": 1.0914056301116943, + "learning_rate": 9.396440397338272e-07, + "logits/chosen": -0.38826486468315125, + "logits/rejected": -0.35520774126052856, + "logps/chosen": -0.7385872602462769, + "logps/rejected": -0.7974889278411865, + "loss": 1.3477, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4771745204925537, + "rewards/margins": 0.11780343949794769, + "rewards/rejected": -1.594977855682373, + "step": 115 + }, + { + "epoch": 0.24525933267261316, + "grad_norm": 1.2966018915176392, + "learning_rate": 9.378674710978183e-07, + "logits/chosen": -0.36493802070617676, + "logits/rejected": -0.34763696789741516, + "logps/chosen": -0.6731826663017273, + "logps/rejected": -0.7645149827003479, + "loss": 1.3, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3463653326034546, + "rewards/margins": 0.18266455829143524, + "rewards/rejected": -1.5290299654006958, + "step": 116 + }, + { + "epoch": 0.24737363726461845, + "grad_norm": 0.49401605129241943, + "learning_rate": 9.360668638863109e-07, + "logits/chosen": -0.40416795015335083, + "logits/rejected": -0.3815993070602417, + "logps/chosen": -0.719497799873352, + "logps/rejected": -0.7588324546813965, + "loss": 1.3621, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.438995599746704, + "rewards/margins": 0.07866920530796051, + "rewards/rejected": -1.517664909362793, + "step": 117 + }, + { + "epoch": 0.2494879418566237, + "grad_norm": 1.0603238344192505, + "learning_rate": 9.342423169512071e-07, + "logits/chosen": -0.3857055604457855, + "logits/rejected": -0.3524513244628906, + "logps/chosen": -0.7373769283294678, + "logps/rejected": -0.7971038818359375, + "loss": 1.3358, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4747538566589355, + "rewards/margins": 0.11945393681526184, + "rewards/rejected": -1.594207763671875, + "step": 118 + }, + { + "epoch": 0.251602246448629, + "grad_norm": 0.9880490303039551, + "learning_rate": 9.323939304586804e-07, + "logits/chosen": -0.31455785036087036, + "logits/rejected": -0.3102484941482544, + "logps/chosen": -0.7276102900505066, + "logps/rejected": -0.7446941137313843, + "loss": 1.3928, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.4552205801010132, + "rewards/margins": 0.034167706966400146, + "rewards/rejected": -1.4893882274627686, + "step": 119 + }, + { + "epoch": 0.2537165510406343, + "grad_norm": 1.0419566631317139, + "learning_rate": 9.305218058836776e-07, + "logits/chosen": -0.38093918561935425, + "logits/rejected": -0.3588898181915283, + "logps/chosen": -0.715582013130188, + "logps/rejected": -0.8271002769470215, + "loss": 1.2934, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.431164026260376, + "rewards/margins": 0.22303667664527893, + "rewards/rejected": -1.654200553894043, + "step": 120 + }, + { + "epoch": 0.2558308556326396, + "grad_norm": 0.657620370388031, + "learning_rate": 9.286260460043473e-07, + "logits/chosen": -0.45690783858299255, + "logits/rejected": -0.4082674980163574, + "logps/chosen": -0.6932571530342102, + "logps/rejected": -0.7631082534790039, + "loss": 1.3398, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3865143060684204, + "rewards/margins": 0.13970226049423218, + "rewards/rejected": -1.5262165069580078, + "step": 121 + }, + { + "epoch": 0.25794516022464486, + "grad_norm": 0.983686089515686, + "learning_rate": 9.267067548963974e-07, + "logits/chosen": -0.40266987681388855, + "logits/rejected": -0.37586671113967896, + "logps/chosen": -0.7362720966339111, + "logps/rejected": -0.7538987398147583, + "loss": 1.4066, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4725441932678223, + "rewards/margins": 0.03525342047214508, + "rewards/rejected": -1.5077974796295166, + "step": 122 + }, + { + "epoch": 0.26005946481665015, + "grad_norm": 1.0076361894607544, + "learning_rate": 9.24764037927381e-07, + "logits/chosen": -0.4461461007595062, + "logits/rejected": -0.40700826048851013, + "logps/chosen": -0.7206646800041199, + "logps/rejected": -0.7489192485809326, + "loss": 1.3759, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4413293600082397, + "rewards/margins": 0.05650928616523743, + "rewards/rejected": -1.4978384971618652, + "step": 123 + }, + { + "epoch": 0.26217376940865544, + "grad_norm": 0.933315098285675, + "learning_rate": 9.22798001750913e-07, + "logits/chosen": -0.3966676890850067, + "logits/rejected": -0.3572196960449219, + "logps/chosen": -0.7075096368789673, + "logps/rejected": -0.7406759262084961, + "loss": 1.3667, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4150192737579346, + "rewards/margins": 0.06633266806602478, + "rewards/rejected": -1.4813518524169922, + "step": 124 + }, + { + "epoch": 0.26428807400066073, + "grad_norm": 0.6277392506599426, + "learning_rate": 9.20808754300814e-07, + "logits/chosen": -0.3555490970611572, + "logits/rejected": -0.35786163806915283, + "logps/chosen": -0.7549921274185181, + "logps/rejected": -0.832869291305542, + "loss": 1.3175, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.5099842548370361, + "rewards/margins": 0.15575438737869263, + "rewards/rejected": -1.665738582611084, + "step": 125 + }, + { + "epoch": 0.266402378592666, + "grad_norm": 0.7172744274139404, + "learning_rate": 9.18796404785185e-07, + "logits/chosen": -0.41230690479278564, + "logits/rejected": -0.39935630559921265, + "logps/chosen": -0.7129833698272705, + "logps/rejected": -0.7888559103012085, + "loss": 1.3167, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.425966739654541, + "rewards/margins": 0.15174514055252075, + "rewards/rejected": -1.577711820602417, + "step": 126 + }, + { + "epoch": 0.2685166831846713, + "grad_norm": 1.2629508972167969, + "learning_rate": 9.16761063680412e-07, + "logits/chosen": -0.36754000186920166, + "logits/rejected": -0.3541562259197235, + "logps/chosen": -0.6992133855819702, + "logps/rejected": -0.7668892741203308, + "loss": 1.3735, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3984267711639404, + "rewards/margins": 0.13535188138484955, + "rewards/rejected": -1.5337785482406616, + "step": 127 + }, + { + "epoch": 0.2706309877766766, + "grad_norm": 0.7024405598640442, + "learning_rate": 9.147028427251009e-07, + "logits/chosen": -0.4014585018157959, + "logits/rejected": -0.40560898184776306, + "logps/chosen": -0.727234959602356, + "logps/rejected": -0.8070081472396851, + "loss": 1.3138, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.454469919204712, + "rewards/margins": 0.15954652428627014, + "rewards/rejected": -1.6140162944793701, + "step": 128 + }, + { + "epoch": 0.2727452923686819, + "grad_norm": 1.6173532009124756, + "learning_rate": 9.126218549139433e-07, + "logits/chosen": -0.32572367787361145, + "logits/rejected": -0.3470613956451416, + "logps/chosen": -0.7555541396141052, + "logps/rejected": -0.8856738209724426, + "loss": 1.2461, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.5111082792282104, + "rewards/margins": 0.26023951172828674, + "rewards/rejected": -1.7713476419448853, + "step": 129 + }, + { + "epoch": 0.2748595969606871, + "grad_norm": 0.5878487229347229, + "learning_rate": 9.105182144915129e-07, + "logits/chosen": -0.39267170429229736, + "logits/rejected": -0.3448992967605591, + "logps/chosen": -0.6776289343833923, + "logps/rejected": -0.7530183792114258, + "loss": 1.3242, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3552578687667847, + "rewards/margins": 0.15077897906303406, + "rewards/rejected": -1.5060367584228516, + "step": 130 + }, + { + "epoch": 0.2769739015526924, + "grad_norm": 0.43264809250831604, + "learning_rate": 9.08392036945994e-07, + "logits/chosen": -0.39980950951576233, + "logits/rejected": -0.4247930645942688, + "logps/chosen": -0.7898982167243958, + "logps/rejected": -0.8856299519538879, + "loss": 1.3004, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5797964334487915, + "rewards/margins": 0.19146347045898438, + "rewards/rejected": -1.7712599039077759, + "step": 131 + }, + { + "epoch": 0.2790882061446977, + "grad_norm": 1.0348538160324097, + "learning_rate": 9.062434390028407e-07, + "logits/chosen": -0.35729700326919556, + "logits/rejected": -0.3265542984008789, + "logps/chosen": -0.7120587229728699, + "logps/rejected": -0.771691083908081, + "loss": 1.3374, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4241174459457397, + "rewards/margins": 0.11926469206809998, + "rewards/rejected": -1.543382167816162, + "step": 132 + }, + { + "epoch": 0.281202510736703, + "grad_norm": 2.0902225971221924, + "learning_rate": 9.04072538618369e-07, + "logits/chosen": -0.4942469298839569, + "logits/rejected": -0.48699846863746643, + "logps/chosen": -0.7882512211799622, + "logps/rejected": -0.8270165920257568, + "loss": 1.3715, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5765024423599243, + "rewards/margins": 0.07753071188926697, + "rewards/rejected": -1.6540331840515137, + "step": 133 + }, + { + "epoch": 0.2833168153287083, + "grad_norm": 1.6436113119125366, + "learning_rate": 9.018794549732817e-07, + "logits/chosen": -0.41133156418800354, + "logits/rejected": -0.4146718382835388, + "logps/chosen": -0.779824435710907, + "logps/rejected": -0.9421006441116333, + "loss": 1.2521, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.559648871421814, + "rewards/margins": 0.324552446603775, + "rewards/rejected": -1.8842012882232666, + "step": 134 + }, + { + "epoch": 0.28543111992071357, + "grad_norm": 0.8831859827041626, + "learning_rate": 8.996643084661244e-07, + "logits/chosen": -0.42452165484428406, + "logits/rejected": -0.3798604905605316, + "logps/chosen": -0.6499216556549072, + "logps/rejected": -0.7796702980995178, + "loss": 1.2581, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.2998433113098145, + "rewards/margins": 0.25949734449386597, + "rewards/rejected": -1.5593405961990356, + "step": 135 + }, + { + "epoch": 0.28754542451271886, + "grad_norm": 0.8031218647956848, + "learning_rate": 8.974272207066767e-07, + "logits/chosen": -0.38131940364837646, + "logits/rejected": -0.3854255676269531, + "logps/chosen": -0.7026851773262024, + "logps/rejected": -0.762391209602356, + "loss": 1.3333, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4053703546524048, + "rewards/margins": 0.11941206455230713, + "rewards/rejected": -1.524782419204712, + "step": 136 + }, + { + "epoch": 0.28965972910472415, + "grad_norm": 1.4455821514129639, + "learning_rate": 8.951683145092748e-07, + "logits/chosen": -0.42824965715408325, + "logits/rejected": -0.4320424795150757, + "logps/chosen": -0.7893270254135132, + "logps/rejected": -0.8517144322395325, + "loss": 1.3652, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5786540508270264, + "rewards/margins": 0.12477481365203857, + "rewards/rejected": -1.703428864479065, + "step": 137 + }, + { + "epoch": 0.29177403369672944, + "grad_norm": 0.6299450397491455, + "learning_rate": 8.928877138860706e-07, + "logits/chosen": -0.4388589560985565, + "logits/rejected": -0.40156903862953186, + "logps/chosen": -0.7346572875976562, + "logps/rejected": -0.8166492581367493, + "loss": 1.3134, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4693145751953125, + "rewards/margins": 0.16398391127586365, + "rewards/rejected": -1.6332985162734985, + "step": 138 + }, + { + "epoch": 0.29388833828873473, + "grad_norm": 2.784437417984009, + "learning_rate": 8.905855440402224e-07, + "logits/chosen": -0.405662477016449, + "logits/rejected": -0.35549795627593994, + "logps/chosen": -0.7482771277427673, + "logps/rejected": -0.795568585395813, + "loss": 1.3656, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.4965542554855347, + "rewards/margins": 0.09458285570144653, + "rewards/rejected": -1.591137170791626, + "step": 139 + }, + { + "epoch": 0.29600264288074, + "grad_norm": 0.4958692193031311, + "learning_rate": 8.882619313590212e-07, + "logits/chosen": -0.3814452886581421, + "logits/rejected": -0.35715553164482117, + "logps/chosen": -0.7731542587280273, + "logps/rejected": -0.8285202980041504, + "loss": 1.3776, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5463085174560547, + "rewards/margins": 0.11073210835456848, + "rewards/rejected": -1.6570405960083008, + "step": 140 + }, + { + "epoch": 0.2981169474727453, + "grad_norm": 0.4597362279891968, + "learning_rate": 8.859170034069532e-07, + "logits/chosen": -0.388383150100708, + "logits/rejected": -0.4071737229824066, + "logps/chosen": -0.7263504266738892, + "logps/rejected": -0.769676148891449, + "loss": 1.3712, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4527008533477783, + "rewards/margins": 0.08665145933628082, + "rewards/rejected": -1.539352297782898, + "step": 141 + }, + { + "epoch": 0.3002312520647506, + "grad_norm": 0.4914930760860443, + "learning_rate": 8.835508889186956e-07, + "logits/chosen": -0.41084378957748413, + "logits/rejected": -0.3823031187057495, + "logps/chosen": -0.7565821409225464, + "logps/rejected": -0.9084322452545166, + "loss": 1.2717, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5131642818450928, + "rewards/margins": 0.3037002384662628, + "rewards/rejected": -1.8168644905090332, + "step": 142 + }, + { + "epoch": 0.3023455566567559, + "grad_norm": 2.0075581073760986, + "learning_rate": 8.811637177920499e-07, + "logits/chosen": -0.4438302516937256, + "logits/rejected": -0.4916025698184967, + "logps/chosen": -0.800719141960144, + "logps/rejected": -0.8658267855644226, + "loss": 1.358, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.601438283920288, + "rewards/margins": 0.1302153617143631, + "rewards/rejected": -1.7316535711288452, + "step": 143 + }, + { + "epoch": 0.3044598612487612, + "grad_norm": 1.1243022680282593, + "learning_rate": 8.7875562108081e-07, + "logits/chosen": -0.40519949793815613, + "logits/rejected": -0.3905750811100006, + "logps/chosen": -0.689585268497467, + "logps/rejected": -0.7312421798706055, + "loss": 1.3503, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.379170536994934, + "rewards/margins": 0.08331384509801865, + "rewards/rejected": -1.462484359741211, + "step": 144 + }, + { + "epoch": 0.3065741658407664, + "grad_norm": 0.7543137669563293, + "learning_rate": 8.76326730987568e-07, + "logits/chosen": -0.4696752727031708, + "logits/rejected": -0.4357326626777649, + "logps/chosen": -0.7813425660133362, + "logps/rejected": -0.8276973962783813, + "loss": 1.3794, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5626851320266724, + "rewards/margins": 0.09270970523357391, + "rewards/rejected": -1.6553947925567627, + "step": 145 + }, + { + "epoch": 0.3086884704327717, + "grad_norm": 1.3136053085327148, + "learning_rate": 8.738771808564555e-07, + "logits/chosen": -0.4262731075286865, + "logits/rejected": -0.44038820266723633, + "logps/chosen": -0.697494387626648, + "logps/rejected": -0.8369535803794861, + "loss": 1.2699, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.394988775253296, + "rewards/margins": 0.2789183557033539, + "rewards/rejected": -1.6739071607589722, + "step": 146 + }, + { + "epoch": 0.310802775024777, + "grad_norm": 2.221562385559082, + "learning_rate": 8.714071051658245e-07, + "logits/chosen": -0.40089336037635803, + "logits/rejected": -0.37991875410079956, + "logps/chosen": -0.7704445123672485, + "logps/rejected": -0.859091579914093, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.540889024734497, + "rewards/margins": 0.17729414999485016, + "rewards/rejected": -1.718183159828186, + "step": 147 + }, + { + "epoch": 0.3129170796167823, + "grad_norm": 1.5049912929534912, + "learning_rate": 8.689166395208636e-07, + "logits/chosen": -0.38984015583992004, + "logits/rejected": -0.35900723934173584, + "logps/chosen": -0.6424779891967773, + "logps/rejected": -0.7145389318466187, + "loss": 1.3261, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.2849559783935547, + "rewards/margins": 0.14412200450897217, + "rewards/rejected": -1.4290778636932373, + "step": 148 + }, + { + "epoch": 0.31503138420878757, + "grad_norm": 0.36125388741493225, + "learning_rate": 8.664059206461534e-07, + "logits/chosen": -0.3490441143512726, + "logits/rejected": -0.3219914436340332, + "logps/chosen": -0.7200264930725098, + "logps/rejected": -0.7924249768257141, + "loss": 1.3476, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4400529861450195, + "rewards/margins": 0.1447969526052475, + "rewards/rejected": -1.5848499536514282, + "step": 149 + }, + { + "epoch": 0.31714568880079286, + "grad_norm": 1.039840579032898, + "learning_rate": 8.638750863781612e-07, + "logits/chosen": -0.40701645612716675, + "logits/rejected": -0.406186580657959, + "logps/chosen": -0.7083575129508972, + "logps/rejected": -0.7766748070716858, + "loss": 1.3263, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4167150259017944, + "rewards/margins": 0.1366347074508667, + "rewards/rejected": -1.5533496141433716, + "step": 150 + }, + { + "epoch": 0.31925999339279815, + "grad_norm": 0.7128564119338989, + "learning_rate": 8.613242756576728e-07, + "logits/chosen": -0.40932926535606384, + "logits/rejected": -0.4234562814235687, + "logps/chosen": -0.6775843501091003, + "logps/rejected": -0.7866222858428955, + "loss": 1.2834, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.3551687002182007, + "rewards/margins": 0.2180757373571396, + "rewards/rejected": -1.573244571685791, + "step": 151 + }, + { + "epoch": 0.32137429798480344, + "grad_norm": 1.1701059341430664, + "learning_rate": 8.587536285221655e-07, + "logits/chosen": -0.3654797077178955, + "logits/rejected": -0.3181680738925934, + "logps/chosen": -0.6686022877693176, + "logps/rejected": -0.7058504223823547, + "loss": 1.3612, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.3372045755386353, + "rewards/margins": 0.07449636608362198, + "rewards/rejected": -1.4117008447647095, + "step": 152 + }, + { + "epoch": 0.3234886025768087, + "grad_norm": 0.8239700794219971, + "learning_rate": 8.561632860981204e-07, + "logits/chosen": -0.42527130246162415, + "logits/rejected": -0.4091627299785614, + "logps/chosen": -0.6969794631004333, + "logps/rejected": -0.8019355535507202, + "loss": 1.2974, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3939589262008667, + "rewards/margins": 0.20991206169128418, + "rewards/rejected": -1.6038711071014404, + "step": 153 + }, + { + "epoch": 0.325602907168814, + "grad_norm": 1.4885636568069458, + "learning_rate": 8.535533905932737e-07, + "logits/chosen": -0.4126192331314087, + "logits/rejected": -0.41548141837120056, + "logps/chosen": -0.7076549530029297, + "logps/rejected": -0.7940821051597595, + "loss": 1.3198, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4153099060058594, + "rewards/margins": 0.17285437881946564, + "rewards/rejected": -1.588164210319519, + "step": 154 + }, + { + "epoch": 0.3277172117608193, + "grad_norm": 1.439434289932251, + "learning_rate": 8.509240852888106e-07, + "logits/chosen": -0.3763914704322815, + "logits/rejected": -0.3617165684700012, + "logps/chosen": -0.7189474105834961, + "logps/rejected": -0.827629804611206, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4378948211669922, + "rewards/margins": 0.2173648476600647, + "rewards/rejected": -1.655259609222412, + "step": 155 + }, + { + "epoch": 0.3298315163528246, + "grad_norm": 1.4505418539047241, + "learning_rate": 8.482755145314985e-07, + "logits/chosen": -0.37879478931427, + "logits/rejected": -0.38689684867858887, + "logps/chosen": -0.7011865973472595, + "logps/rejected": -0.8019431829452515, + "loss": 1.3158, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.402373194694519, + "rewards/margins": 0.2015131413936615, + "rewards/rejected": -1.603886365890503, + "step": 156 + }, + { + "epoch": 0.3319458209448299, + "grad_norm": 2.0968713760375977, + "learning_rate": 8.45607823725763e-07, + "logits/chosen": -0.4366365075111389, + "logits/rejected": -0.41210681200027466, + "logps/chosen": -0.6455651521682739, + "logps/rejected": -0.7228428721427917, + "loss": 1.3247, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.2911303043365479, + "rewards/margins": 0.1545555144548416, + "rewards/rejected": -1.4456857442855835, + "step": 157 + }, + { + "epoch": 0.3340601255368352, + "grad_norm": 0.6716106534004211, + "learning_rate": 8.429211593257052e-07, + "logits/chosen": -0.42992207407951355, + "logits/rejected": -0.4105672836303711, + "logps/chosen": -0.6981461048126221, + "logps/rejected": -0.7909567952156067, + "loss": 1.3128, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3962922096252441, + "rewards/margins": 0.1856214702129364, + "rewards/rejected": -1.5819135904312134, + "step": 158 + }, + { + "epoch": 0.33617443012884046, + "grad_norm": 2.4430501461029053, + "learning_rate": 8.402156688270612e-07, + "logits/chosen": -0.4184916317462921, + "logits/rejected": -0.3943992257118225, + "logps/chosen": -0.6568948030471802, + "logps/rejected": -0.7506390810012817, + "loss": 1.2992, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3137896060943604, + "rewards/margins": 0.18748846650123596, + "rewards/rejected": -1.5012781620025635, + "step": 159 + }, + { + "epoch": 0.3382887347208457, + "grad_norm": 2.0322091579437256, + "learning_rate": 8.374915007591052e-07, + "logits/chosen": -0.4713057577610016, + "logits/rejected": -0.42163771390914917, + "logps/chosen": -0.7347853779792786, + "logps/rejected": -0.7770044207572937, + "loss": 1.3801, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4695707559585571, + "rewards/margins": 0.0844380110502243, + "rewards/rejected": -1.5540088415145874, + "step": 160 + }, + { + "epoch": 0.340403039312851, + "grad_norm": 0.4045500159263611, + "learning_rate": 8.347488046764948e-07, + "logits/chosen": -0.39465126395225525, + "logits/rejected": -0.3961923122406006, + "logps/chosen": -0.601732075214386, + "logps/rejected": -0.694148600101471, + "loss": 1.2859, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.203464150428772, + "rewards/margins": 0.18483319878578186, + "rewards/rejected": -1.388297200202942, + "step": 161 + }, + { + "epoch": 0.3425173439048563, + "grad_norm": 2.79396915435791, + "learning_rate": 8.319877311510612e-07, + "logits/chosen": -0.4311378002166748, + "logits/rejected": -0.4248836636543274, + "logps/chosen": -0.6813413500785828, + "logps/rejected": -0.775830864906311, + "loss": 1.3001, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3626827001571655, + "rewards/margins": 0.18897925317287445, + "rewards/rejected": -1.551661729812622, + "step": 162 + }, + { + "epoch": 0.34463164849686156, + "grad_norm": 0.714146077632904, + "learning_rate": 8.292084317635419e-07, + "logits/chosen": -0.4060715436935425, + "logits/rejected": -0.3770482540130615, + "logps/chosen": -0.7176523208618164, + "logps/rejected": -0.7973593473434448, + "loss": 1.324, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4353046417236328, + "rewards/margins": 0.15941408276557922, + "rewards/rejected": -1.5947186946868896, + "step": 163 + }, + { + "epoch": 0.34674595308886685, + "grad_norm": 1.6007037162780762, + "learning_rate": 8.264110590952607e-07, + "logits/chosen": -0.49063974618911743, + "logits/rejected": -0.5119628310203552, + "logps/chosen": -0.7263911366462708, + "logps/rejected": -0.9138184785842896, + "loss": 1.2439, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.4527822732925415, + "rewards/margins": 0.3748546540737152, + "rewards/rejected": -1.827636957168579, + "step": 164 + }, + { + "epoch": 0.34886025768087214, + "grad_norm": 1.4566830396652222, + "learning_rate": 8.235957667197494e-07, + "logits/chosen": -0.4681779146194458, + "logits/rejected": -0.46475380659103394, + "logps/chosen": -0.6923782229423523, + "logps/rejected": -0.7901281118392944, + "loss": 1.295, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.3847564458847046, + "rewards/margins": 0.19549959897994995, + "rewards/rejected": -1.5802562236785889, + "step": 165 + }, + { + "epoch": 0.35097456227287743, + "grad_norm": 3.0825328826904297, + "learning_rate": 8.207627091943177e-07, + "logits/chosen": -0.4294862151145935, + "logits/rejected": -0.42411237955093384, + "logps/chosen": -0.6851246356964111, + "logps/rejected": -0.7844961881637573, + "loss": 1.2871, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3702492713928223, + "rewards/margins": 0.19874317944049835, + "rewards/rejected": -1.5689923763275146, + "step": 166 + }, + { + "epoch": 0.3530888668648827, + "grad_norm": 1.0783339738845825, + "learning_rate": 8.179120420515675e-07, + "logits/chosen": -0.4528030455112457, + "logits/rejected": -0.4626815617084503, + "logps/chosen": -0.703376293182373, + "logps/rejected": -0.8752757906913757, + "loss": 1.2193, + "rewards/accuracies": 0.703125, + "rewards/chosen": -1.406752586364746, + "rewards/margins": 0.34379899501800537, + "rewards/rejected": -1.7505515813827515, + "step": 167 + }, + { + "epoch": 0.355203171456888, + "grad_norm": 2.6788036823272705, + "learning_rate": 8.150439217908556e-07, + "logits/chosen": -0.44946759939193726, + "logits/rejected": -0.47430264949798584, + "logps/chosen": -0.751136839389801, + "logps/rejected": -0.874577522277832, + "loss": 1.29, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.502273678779602, + "rewards/margins": 0.24688144028186798, + "rewards/rejected": -1.749155044555664, + "step": 168 + }, + { + "epoch": 0.3573174760488933, + "grad_norm": 0.9087730050086975, + "learning_rate": 8.121585058696999e-07, + "logits/chosen": -0.47294262051582336, + "logits/rejected": -0.46765226125717163, + "logps/chosen": -0.7291173934936523, + "logps/rejected": -0.7999277114868164, + "loss": 1.3482, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4582347869873047, + "rewards/margins": 0.1416206806898117, + "rewards/rejected": -1.5998554229736328, + "step": 169 + }, + { + "epoch": 0.3594317806408986, + "grad_norm": 3.392674207687378, + "learning_rate": 8.092559526951374e-07, + "logits/chosen": -0.5026620626449585, + "logits/rejected": -0.46620574593544006, + "logps/chosen": -0.746992290019989, + "logps/rejected": -0.8266301155090332, + "loss": 1.3202, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.493984580039978, + "rewards/margins": 0.15927578508853912, + "rewards/rejected": -1.6532602310180664, + "step": 170 + }, + { + "epoch": 0.3615460852329039, + "grad_norm": 1.27628755569458, + "learning_rate": 8.063364216150256e-07, + "logits/chosen": -0.5211395025253296, + "logits/rejected": -0.5419963598251343, + "logps/chosen": -0.7919114828109741, + "logps/rejected": -0.8731362223625183, + "loss": 1.3228, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5838229656219482, + "rewards/margins": 0.16244953870773315, + "rewards/rejected": -1.7462724447250366, + "step": 171 + }, + { + "epoch": 0.36366038982490917, + "grad_norm": 0.8269656896591187, + "learning_rate": 8.034000729092967e-07, + "logits/chosen": -0.49545183777809143, + "logits/rejected": -0.4716613292694092, + "logps/chosen": -0.719520092010498, + "logps/rejected": -0.7876347303390503, + "loss": 1.3367, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.439040184020996, + "rewards/margins": 0.13622930645942688, + "rewards/rejected": -1.5752694606781006, + "step": 172 + }, + { + "epoch": 0.36577469441691446, + "grad_norm": 0.6049383282661438, + "learning_rate": 8.004470677811559e-07, + "logits/chosen": -0.45276379585266113, + "logits/rejected": -0.42617955803871155, + "logps/chosen": -0.7097947597503662, + "logps/rejected": -0.7606989145278931, + "loss": 1.3909, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4195895195007324, + "rewards/margins": 0.10180822014808655, + "rewards/rejected": -1.5213978290557861, + "step": 173 + }, + { + "epoch": 0.36788899900891975, + "grad_norm": 3.980013847351074, + "learning_rate": 7.974775683482337e-07, + "logits/chosen": -0.4783569574356079, + "logits/rejected": -0.43521156907081604, + "logps/chosen": -0.7623491287231445, + "logps/rejected": -0.8719285130500793, + "loss": 1.2838, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.524698257446289, + "rewards/margins": 0.2191585898399353, + "rewards/rejected": -1.7438570261001587, + "step": 174 + }, + { + "epoch": 0.370003303600925, + "grad_norm": 1.024530053138733, + "learning_rate": 7.94491737633684e-07, + "logits/chosen": -0.5009916424751282, + "logits/rejected": -0.48874592781066895, + "logps/chosen": -0.7552992701530457, + "logps/rejected": -0.8485872745513916, + "loss": 1.3153, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5105985403060913, + "rewards/margins": 0.18657605350017548, + "rewards/rejected": -1.6971745491027832, + "step": 175 + }, + { + "epoch": 0.37211760819293027, + "grad_norm": 1.5952919721603394, + "learning_rate": 7.91489739557236e-07, + "logits/chosen": -0.4424138069152832, + "logits/rejected": -0.4334307312965393, + "logps/chosen": -0.6956002116203308, + "logps/rejected": -0.8018803000450134, + "loss": 1.3011, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.3912004232406616, + "rewards/margins": 0.21256020665168762, + "rewards/rejected": -1.6037606000900269, + "step": 176 + }, + { + "epoch": 0.37423191278493556, + "grad_norm": 1.8331164121627808, + "learning_rate": 7.884717389261934e-07, + "logits/chosen": -0.4836267828941345, + "logits/rejected": -0.5018677115440369, + "logps/chosen": -0.7895969152450562, + "logps/rejected": -0.927432656288147, + "loss": 1.2467, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5791938304901123, + "rewards/margins": 0.27567166090011597, + "rewards/rejected": -1.854865312576294, + "step": 177 + }, + { + "epoch": 0.37634621737694085, + "grad_norm": 2.165984869003296, + "learning_rate": 7.854379014263876e-07, + "logits/chosen": -0.46125832200050354, + "logits/rejected": -0.39802712202072144, + "logps/chosen": -0.8382925391197205, + "logps/rejected": -0.9422982931137085, + "loss": 1.339, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.676585078239441, + "rewards/margins": 0.20801125466823578, + "rewards/rejected": -1.884596586227417, + "step": 178 + }, + { + "epoch": 0.37846052196894614, + "grad_norm": 0.522197425365448, + "learning_rate": 7.823883936130817e-07, + "logits/chosen": -0.4747823476791382, + "logits/rejected": -0.4888593554496765, + "logps/chosen": -0.723059892654419, + "logps/rejected": -0.84626305103302, + "loss": 1.2708, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.446119785308838, + "rewards/margins": 0.24640652537345886, + "rewards/rejected": -1.69252610206604, + "step": 179 + }, + { + "epoch": 0.38057482656095143, + "grad_norm": 1.9690748453140259, + "learning_rate": 7.793233829018262e-07, + "logits/chosen": -0.5430271625518799, + "logits/rejected": -0.5403288006782532, + "logps/chosen": -0.8244275450706482, + "logps/rejected": -0.9133931994438171, + "loss": 1.3306, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6488550901412964, + "rewards/margins": 0.17793115973472595, + "rewards/rejected": -1.8267863988876343, + "step": 180 + }, + { + "epoch": 0.3826891311529567, + "grad_norm": 2.9181363582611084, + "learning_rate": 7.762430375592688e-07, + "logits/chosen": -0.4843495786190033, + "logits/rejected": -0.47929176688194275, + "logps/chosen": -0.8097372055053711, + "logps/rejected": -0.8973760008811951, + "loss": 1.3283, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6194744110107422, + "rewards/margins": 0.17527759075164795, + "rewards/rejected": -1.7947520017623901, + "step": 181 + }, + { + "epoch": 0.384803435744962, + "grad_norm": 4.227083683013916, + "learning_rate": 7.731475266939158e-07, + "logits/chosen": -0.5047686696052551, + "logits/rejected": -0.4921850264072418, + "logps/chosen": -0.875984787940979, + "logps/rejected": -1.0406755208969116, + "loss": 1.3169, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.751969575881958, + "rewards/margins": 0.32938146591186523, + "rewards/rejected": -2.0813510417938232, + "step": 182 + }, + { + "epoch": 0.3869177403369673, + "grad_norm": 1.2871490716934204, + "learning_rate": 7.700370202468489e-07, + "logits/chosen": -0.5123783349990845, + "logits/rejected": -0.55179762840271, + "logps/chosen": -0.8869211077690125, + "logps/rejected": -1.1082773208618164, + "loss": 1.216, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.773842215538025, + "rewards/margins": 0.4427123963832855, + "rewards/rejected": -2.216554641723633, + "step": 183 + }, + { + "epoch": 0.3890320449289726, + "grad_norm": 1.3015679121017456, + "learning_rate": 7.669116889823954e-07, + "logits/chosen": -0.49182361364364624, + "logits/rejected": -0.5180585384368896, + "logps/chosen": -0.8816227912902832, + "logps/rejected": -0.9516821503639221, + "loss": 1.3449, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7632455825805664, + "rewards/margins": 0.14011862874031067, + "rewards/rejected": -1.9033643007278442, + "step": 184 + }, + { + "epoch": 0.3911463495209779, + "grad_norm": 4.280956268310547, + "learning_rate": 7.637717044787526e-07, + "logits/chosen": -0.5702117681503296, + "logits/rejected": -0.5475804209709167, + "logps/chosen": -0.9307697415351868, + "logps/rejected": -1.0322346687316895, + "loss": 1.3434, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.8615394830703735, + "rewards/margins": 0.20292985439300537, + "rewards/rejected": -2.064469337463379, + "step": 185 + }, + { + "epoch": 0.39326065411298317, + "grad_norm": 1.3511455059051514, + "learning_rate": 7.606172391185699e-07, + "logits/chosen": -0.5466108322143555, + "logits/rejected": -0.551085352897644, + "logps/chosen": -1.0657893419265747, + "logps/rejected": -1.15786612033844, + "loss": 1.3549, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -2.1315786838531494, + "rewards/margins": 0.18415334820747375, + "rewards/rejected": -2.31573224067688, + "step": 186 + }, + { + "epoch": 0.39537495870498846, + "grad_norm": 0.7001176476478577, + "learning_rate": 7.574484660794836e-07, + "logits/chosen": -0.4849010407924652, + "logits/rejected": -0.5057946443557739, + "logps/chosen": -1.0784757137298584, + "logps/rejected": -1.2035218477249146, + "loss": 1.3556, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.156951427459717, + "rewards/margins": 0.25009211897850037, + "rewards/rejected": -2.407043695449829, + "step": 187 + }, + { + "epoch": 0.39748926329699374, + "grad_norm": 3.1405649185180664, + "learning_rate": 7.542655593246103e-07, + "logits/chosen": -0.5316596031188965, + "logits/rejected": -0.5658366680145264, + "logps/chosen": -1.0630009174346924, + "logps/rejected": -1.2867177724838257, + "loss": 1.2612, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.1260018348693848, + "rewards/margins": 0.447433739900589, + "rewards/rejected": -2.5734355449676514, + "step": 188 + }, + { + "epoch": 0.39960356788899903, + "grad_norm": 2.142986297607422, + "learning_rate": 7.510686935929962e-07, + "logits/chosen": -0.5959028005599976, + "logits/rejected": -0.5836039781570435, + "logps/chosen": -1.111003041267395, + "logps/rejected": -1.1858208179473877, + "loss": 1.3958, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.22200608253479, + "rewards/margins": 0.149635449051857, + "rewards/rejected": -2.3716416358947754, + "step": 189 + }, + { + "epoch": 0.40171787248100427, + "grad_norm": 1.9227335453033447, + "learning_rate": 7.478580443900246e-07, + "logits/chosen": -0.607532799243927, + "logits/rejected": -0.6102017760276794, + "logps/chosen": -1.3353261947631836, + "logps/rejected": -1.3975369930267334, + "loss": 1.457, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -2.670652389526367, + "rewards/margins": 0.12442154437303543, + "rewards/rejected": -2.795073986053467, + "step": 190 + }, + { + "epoch": 0.40383217707300956, + "grad_norm": 0.8509105443954468, + "learning_rate": 7.446337879777802e-07, + "logits/chosen": -0.5903070569038391, + "logits/rejected": -0.5728173851966858, + "logps/chosen": -1.27094566822052, + "logps/rejected": -1.3024815320968628, + "loss": 1.4953, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.54189133644104, + "rewards/margins": 0.06307169049978256, + "rewards/rejected": -2.6049630641937256, + "step": 191 + }, + { + "epoch": 0.40594648166501485, + "grad_norm": 1.1561088562011719, + "learning_rate": 7.413961013653725e-07, + "logits/chosen": -0.5578102469444275, + "logits/rejected": -0.5907329320907593, + "logps/chosen": -1.3817013502120972, + "logps/rejected": -1.419295072555542, + "loss": 1.4865, + "rewards/accuracies": 0.515625, + "rewards/chosen": -2.7634027004241943, + "rewards/margins": 0.07518734782934189, + "rewards/rejected": -2.838590145111084, + "step": 192 + }, + { + "epoch": 0.40806078625702014, + "grad_norm": 8.165387153625488, + "learning_rate": 7.381451622992183e-07, + "logits/chosen": -0.5213198661804199, + "logits/rejected": -0.5392848253250122, + "logps/chosen": -1.1798306703567505, + "logps/rejected": -1.2692899703979492, + "loss": 1.3971, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.359661340713501, + "rewards/margins": 0.17891867458820343, + "rewards/rejected": -2.5385799407958984, + "step": 193 + }, + { + "epoch": 0.4101750908490254, + "grad_norm": 1.2850884199142456, + "learning_rate": 7.348811492532839e-07, + "logits/chosen": -0.5382787585258484, + "logits/rejected": -0.5274642705917358, + "logps/chosen": -1.242587685585022, + "logps/rejected": -1.272438645362854, + "loss": 1.4795, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.485175371170044, + "rewards/margins": 0.05970197170972824, + "rewards/rejected": -2.544877290725708, + "step": 194 + }, + { + "epoch": 0.4122893954410307, + "grad_norm": 4.910929203033447, + "learning_rate": 7.316042414192864e-07, + "logits/chosen": -0.6186666488647461, + "logits/rejected": -0.6255884170532227, + "logps/chosen": -1.1743704080581665, + "logps/rejected": -1.2720146179199219, + "loss": 1.4127, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.348740816116333, + "rewards/margins": 0.19528816640377045, + "rewards/rejected": -2.5440292358398438, + "step": 195 + }, + { + "epoch": 0.414403700033036, + "grad_norm": 4.270901203155518, + "learning_rate": 7.283146186968565e-07, + "logits/chosen": -0.5861366987228394, + "logits/rejected": -0.6005197763442993, + "logps/chosen": -1.2127022743225098, + "logps/rejected": -1.3036490678787231, + "loss": 1.4067, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.4254045486450195, + "rewards/margins": 0.18189355731010437, + "rewards/rejected": -2.6072981357574463, + "step": 196 + }, + { + "epoch": 0.4165180046250413, + "grad_norm": 0.3070116639137268, + "learning_rate": 7.250124616836622e-07, + "logits/chosen": -0.6026022434234619, + "logits/rejected": -0.5920048952102661, + "logps/chosen": -1.0706496238708496, + "logps/rejected": -1.2879594564437866, + "loss": 1.2465, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.141299247741699, + "rewards/margins": 0.4346192479133606, + "rewards/rejected": -2.5759189128875732, + "step": 197 + }, + { + "epoch": 0.4186323092170466, + "grad_norm": 1.160252571105957, + "learning_rate": 7.216979516654943e-07, + "logits/chosen": -0.5808722376823425, + "logits/rejected": -0.5770124197006226, + "logps/chosen": -1.0426011085510254, + "logps/rejected": -1.1295092105865479, + "loss": 1.4244, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.085202217102051, + "rewards/margins": 0.1738162338733673, + "rewards/rejected": -2.2590184211730957, + "step": 198 + }, + { + "epoch": 0.4207466138090519, + "grad_norm": 4.6966471672058105, + "learning_rate": 7.183712706063132e-07, + "logits/chosen": -0.5958350896835327, + "logits/rejected": -0.6440161466598511, + "logps/chosen": -0.981076717376709, + "logps/rejected": -1.1257147789001465, + "loss": 1.3175, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.962153434753418, + "rewards/margins": 0.28927627205848694, + "rewards/rejected": -2.251429557800293, + "step": 199 + }, + { + "epoch": 0.42286091840105716, + "grad_norm": 2.9395248889923096, + "learning_rate": 7.150326011382603e-07, + "logits/chosen": -0.5647889375686646, + "logits/rejected": -0.5762943625450134, + "logps/chosen": -0.8101261854171753, + "logps/rejected": -1.0001438856124878, + "loss": 1.2135, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6202523708343506, + "rewards/margins": 0.38003528118133545, + "rewards/rejected": -2.0002877712249756, + "step": 200 + }, + { + "epoch": 0.42497522299306245, + "grad_norm": 1.2575147151947021, + "learning_rate": 7.116821265516306e-07, + "logits/chosen": -0.5834293961524963, + "logits/rejected": -0.5929508805274963, + "logps/chosen": -0.8768399953842163, + "logps/rejected": -1.0942046642303467, + "loss": 1.219, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7536799907684326, + "rewards/margins": 0.43472927808761597, + "rewards/rejected": -2.1884093284606934, + "step": 201 + }, + { + "epoch": 0.42708952758506774, + "grad_norm": 1.4035751819610596, + "learning_rate": 7.083200307848115e-07, + "logits/chosen": -0.5424078106880188, + "logits/rejected": -0.5316082239151001, + "logps/chosen": -0.8791903257369995, + "logps/rejected": -0.9323580265045166, + "loss": 1.3675, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.758380651473999, + "rewards/margins": 0.10633517056703568, + "rewards/rejected": -1.8647160530090332, + "step": 202 + }, + { + "epoch": 0.42920383217707303, + "grad_norm": 1.8622503280639648, + "learning_rate": 7.049464984141829e-07, + "logits/chosen": -0.5329294204711914, + "logits/rejected": -0.5523126721382141, + "logps/chosen": -0.695776104927063, + "logps/rejected": -0.8400713801383972, + "loss": 1.2285, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.391552209854126, + "rewards/margins": 0.28859058022499084, + "rewards/rejected": -1.6801427602767944, + "step": 203 + }, + { + "epoch": 0.4313181367690783, + "grad_norm": 0.8603182435035706, + "learning_rate": 7.015617146439861e-07, + "logits/chosen": -0.4516752064228058, + "logits/rejected": -0.46907976269721985, + "logps/chosen": -0.6868133544921875, + "logps/rejected": -0.8646677732467651, + "loss": 1.2417, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.373626708984375, + "rewards/margins": 0.355709046125412, + "rewards/rejected": -1.7293355464935303, + "step": 204 + }, + { + "epoch": 0.43343244136108355, + "grad_norm": 0.6437748670578003, + "learning_rate": 6.981658652961546e-07, + "logits/chosen": -0.6159051656723022, + "logits/rejected": -0.6000130772590637, + "logps/chosen": -0.7715178728103638, + "logps/rejected": -0.8714219331741333, + "loss": 1.3469, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5430357456207275, + "rewards/margins": 0.19980813562870026, + "rewards/rejected": -1.7428438663482666, + "step": 205 + }, + { + "epoch": 0.43554674595308884, + "grad_norm": 1.2309322357177734, + "learning_rate": 6.947591368001137e-07, + "logits/chosen": -0.5913614630699158, + "logits/rejected": -0.6128537654876709, + "logps/chosen": -0.7512561678886414, + "logps/rejected": -0.8872793912887573, + "loss": 1.26, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5025123357772827, + "rewards/margins": 0.2720465660095215, + "rewards/rejected": -1.7745587825775146, + "step": 206 + }, + { + "epoch": 0.43766105054509413, + "grad_norm": 0.6153685450553894, + "learning_rate": 6.913417161825449e-07, + "logits/chosen": -0.5976595878601074, + "logits/rejected": -0.6222202181816101, + "logps/chosen": -0.837669849395752, + "logps/rejected": -0.9835771918296814, + "loss": 1.2986, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.675339698791504, + "rewards/margins": 0.2918146252632141, + "rewards/rejected": -1.9671543836593628, + "step": 207 + }, + { + "epoch": 0.4397753551370994, + "grad_norm": 1.9922760725021362, + "learning_rate": 6.87913791057119e-07, + "logits/chosen": -0.6808818578720093, + "logits/rejected": -0.6692708730697632, + "logps/chosen": -0.7088961601257324, + "logps/rejected": -0.8256410360336304, + "loss": 1.281, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4177923202514648, + "rewards/margins": 0.23348984122276306, + "rewards/rejected": -1.6512820720672607, + "step": 208 + }, + { + "epoch": 0.4418896597291047, + "grad_norm": 1.9562067985534668, + "learning_rate": 6.844755496141961e-07, + "logits/chosen": -0.5282632112503052, + "logits/rejected": -0.5692226886749268, + "logps/chosen": -0.7235382795333862, + "logps/rejected": -0.801092803478241, + "loss": 1.3227, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4470765590667725, + "rewards/margins": 0.1551089584827423, + "rewards/rejected": -1.602185606956482, + "step": 209 + }, + { + "epoch": 0.44400396432111, + "grad_norm": 0.8182584047317505, + "learning_rate": 6.81027180610493e-07, + "logits/chosen": -0.6418904662132263, + "logits/rejected": -0.5941328406333923, + "logps/chosen": -0.820648729801178, + "logps/rejected": -0.8864803910255432, + "loss": 1.3498, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.641297459602356, + "rewards/margins": 0.13166317343711853, + "rewards/rejected": -1.7729607820510864, + "step": 210 + }, + { + "epoch": 0.4461182689131153, + "grad_norm": 3.075260877609253, + "learning_rate": 6.775688733587227e-07, + "logits/chosen": -0.5926809906959534, + "logits/rejected": -0.5844541788101196, + "logps/chosen": -0.7822425365447998, + "logps/rejected": -0.8866626024246216, + "loss": 1.2884, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5644850730895996, + "rewards/margins": 0.20884013175964355, + "rewards/rejected": -1.7733252048492432, + "step": 211 + }, + { + "epoch": 0.4482325735051206, + "grad_norm": 0.8032744526863098, + "learning_rate": 6.741008177171993e-07, + "logits/chosen": -0.579971432685852, + "logits/rejected": -0.5978566408157349, + "logps/chosen": -0.721234917640686, + "logps/rejected": -0.8368514180183411, + "loss": 1.2781, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.442469835281372, + "rewards/margins": 0.23123310506343842, + "rewards/rejected": -1.6737028360366821, + "step": 212 + }, + { + "epoch": 0.45034687809712587, + "grad_norm": 0.6680911779403687, + "learning_rate": 6.706232040794161e-07, + "logits/chosen": -0.6748596429824829, + "logits/rejected": -0.6615546941757202, + "logps/chosen": -0.7931480407714844, + "logps/rejected": -0.8879257440567017, + "loss": 1.337, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5862960815429688, + "rewards/margins": 0.1895553171634674, + "rewards/rejected": -1.7758514881134033, + "step": 213 + }, + { + "epoch": 0.45246118268913116, + "grad_norm": 2.5107688903808594, + "learning_rate": 6.671362233635925e-07, + "logits/chosen": -0.6460363268852234, + "logits/rejected": -0.6273557543754578, + "logps/chosen": -0.823783814907074, + "logps/rejected": -0.87412428855896, + "loss": 1.3756, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.647567629814148, + "rewards/margins": 0.10068092495203018, + "rewards/rejected": -1.74824857711792, + "step": 214 + }, + { + "epoch": 0.45457548728113645, + "grad_norm": 2.2206740379333496, + "learning_rate": 6.636400670021933e-07, + "logits/chosen": -0.6295229196548462, + "logits/rejected": -0.6330893039703369, + "logps/chosen": -0.807812511920929, + "logps/rejected": -0.9784457683563232, + "loss": 1.2259, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.615625023841858, + "rewards/margins": 0.3412665128707886, + "rewards/rejected": -1.9568915367126465, + "step": 215 + }, + { + "epoch": 0.45668979187314174, + "grad_norm": 1.2925803661346436, + "learning_rate": 6.601349269314187e-07, + "logits/chosen": -0.6001027822494507, + "logits/rejected": -0.6305864453315735, + "logps/chosen": -0.7216315865516663, + "logps/rejected": -0.8616191744804382, + "loss": 1.269, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.4432631731033325, + "rewards/margins": 0.2799749970436096, + "rewards/rejected": -1.7232383489608765, + "step": 216 + }, + { + "epoch": 0.458804096465147, + "grad_norm": 4.863992214202881, + "learning_rate": 6.566209955806679e-07, + "logits/chosen": -0.5307935476303101, + "logits/rejected": -0.5385264754295349, + "logps/chosen": -0.8053566813468933, + "logps/rejected": -0.9241464734077454, + "loss": 1.3325, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.6107133626937866, + "rewards/margins": 0.23757943511009216, + "rewards/rejected": -1.8482929468154907, + "step": 217 + }, + { + "epoch": 0.4609184010571523, + "grad_norm": 1.0189604759216309, + "learning_rate": 6.530984658619733e-07, + "logits/chosen": -0.7031885385513306, + "logits/rejected": -0.7072005867958069, + "logps/chosen": -0.8382629752159119, + "logps/rejected": -0.9468755722045898, + "loss": 1.3276, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.6765259504318237, + "rewards/margins": 0.21722503006458282, + "rewards/rejected": -1.8937511444091797, + "step": 218 + }, + { + "epoch": 0.4630327056491576, + "grad_norm": 1.1178699731826782, + "learning_rate": 6.495675311594122e-07, + "logits/chosen": -0.5736142992973328, + "logits/rejected": -0.5926069021224976, + "logps/chosen": -0.7676032781600952, + "logps/rejected": -0.9179919958114624, + "loss": 1.278, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5352065563201904, + "rewards/margins": 0.3007773756980896, + "rewards/rejected": -1.8359839916229248, + "step": 219 + }, + { + "epoch": 0.4651470102411629, + "grad_norm": 2.4985287189483643, + "learning_rate": 6.460283853184879e-07, + "logits/chosen": -0.6372602581977844, + "logits/rejected": -0.6313104033470154, + "logps/chosen": -0.8754556179046631, + "logps/rejected": -0.9803894758224487, + "loss": 1.3166, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7509112358093262, + "rewards/margins": 0.2098677009344101, + "rewards/rejected": -1.9607789516448975, + "step": 220 + }, + { + "epoch": 0.46726131483316813, + "grad_norm": 1.5675435066223145, + "learning_rate": 6.424812226354889e-07, + "logits/chosen": -0.6377983093261719, + "logits/rejected": -0.6666730642318726, + "logps/chosen": -0.7556843757629395, + "logps/rejected": -0.9096466302871704, + "loss": 1.2397, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.511368751525879, + "rewards/margins": 0.30792441964149475, + "rewards/rejected": -1.8192932605743408, + "step": 221 + }, + { + "epoch": 0.4693756194251734, + "grad_norm": 2.853426218032837, + "learning_rate": 6.389262378468219e-07, + "logits/chosen": -0.6055567860603333, + "logits/rejected": -0.612144947052002, + "logps/chosen": -0.8588352203369141, + "logps/rejected": -0.8928595185279846, + "loss": 1.4022, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7176704406738281, + "rewards/margins": 0.06804870069026947, + "rewards/rejected": -1.7857190370559692, + "step": 222 + }, + { + "epoch": 0.4714899240171787, + "grad_norm": 0.528042733669281, + "learning_rate": 6.353636261183213e-07, + "logits/chosen": -0.6543641090393066, + "logits/rejected": -0.6635830402374268, + "logps/chosen": -0.7858147621154785, + "logps/rejected": -0.9400445222854614, + "loss": 1.2446, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.571629524230957, + "rewards/margins": 0.3084595203399658, + "rewards/rejected": -1.8800890445709229, + "step": 223 + }, + { + "epoch": 0.473604228609184, + "grad_norm": 1.1155768632888794, + "learning_rate": 6.317935830345338e-07, + "logits/chosen": -0.5700349807739258, + "logits/rejected": -0.6560614705085754, + "logps/chosen": -0.8426170945167542, + "logps/rejected": -0.9983471035957336, + "loss": 1.3204, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6852341890335083, + "rewards/margins": 0.3114599883556366, + "rewards/rejected": -1.9966942071914673, + "step": 224 + }, + { + "epoch": 0.4757185332011893, + "grad_norm": 0.802669107913971, + "learning_rate": 6.282163045879823e-07, + "logits/chosen": -0.6912901401519775, + "logits/rejected": -0.7201069593429565, + "logps/chosen": -0.8135342597961426, + "logps/rejected": -0.9537283182144165, + "loss": 1.2961, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6270685195922852, + "rewards/margins": 0.2803882658481598, + "rewards/rejected": -1.907456636428833, + "step": 225 + }, + { + "epoch": 0.4778328377931946, + "grad_norm": 1.709757924079895, + "learning_rate": 6.246319871684047e-07, + "logits/chosen": -0.7573816776275635, + "logits/rejected": -0.8028420209884644, + "logps/chosen": -0.891952633857727, + "logps/rejected": -1.0168029069900513, + "loss": 1.333, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.783905267715454, + "rewards/margins": 0.24970072507858276, + "rewards/rejected": -2.0336058139801025, + "step": 226 + }, + { + "epoch": 0.47994714238519987, + "grad_norm": 2.170957326889038, + "learning_rate": 6.210408275519734e-07, + "logits/chosen": -0.6915597915649414, + "logits/rejected": -0.7027997970581055, + "logps/chosen": -0.9063036441802979, + "logps/rejected": -1.0104373693466187, + "loss": 1.3388, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8126072883605957, + "rewards/margins": 0.20826762914657593, + "rewards/rejected": -2.0208747386932373, + "step": 227 + }, + { + "epoch": 0.48206144697720515, + "grad_norm": 1.8802261352539062, + "learning_rate": 6.174430228904919e-07, + "logits/chosen": -0.689726710319519, + "logits/rejected": -0.7143282890319824, + "logps/chosen": -0.7480812072753906, + "logps/rejected": -0.8698041439056396, + "loss": 1.2836, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4961624145507812, + "rewards/margins": 0.24344584345817566, + "rewards/rejected": -1.7396082878112793, + "step": 228 + }, + { + "epoch": 0.48417575156921044, + "grad_norm": 2.5202934741973877, + "learning_rate": 6.13838770700571e-07, + "logits/chosen": -0.6858299374580383, + "logits/rejected": -0.7115206122398376, + "logps/chosen": -0.8575515151023865, + "logps/rejected": -0.9657347202301025, + "loss": 1.3046, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.715103030204773, + "rewards/margins": 0.21636635065078735, + "rewards/rejected": -1.931469440460205, + "step": 229 + }, + { + "epoch": 0.48629005616121573, + "grad_norm": 1.268512487411499, + "learning_rate": 6.102282688527859e-07, + "logits/chosen": -0.7078689932823181, + "logits/rejected": -0.7254161238670349, + "logps/chosen": -0.8850880861282349, + "logps/rejected": -1.031385898590088, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7701761722564697, + "rewards/margins": 0.29259535670280457, + "rewards/rejected": -2.062771797180176, + "step": 230 + }, + { + "epoch": 0.488404360753221, + "grad_norm": 1.7285584211349487, + "learning_rate": 6.066117155608135e-07, + "logits/chosen": -0.7325868606567383, + "logits/rejected": -0.7433226108551025, + "logps/chosen": -0.8014956116676331, + "logps/rejected": -0.9653260111808777, + "loss": 1.2429, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.6029912233352661, + "rewards/margins": 0.32766085863113403, + "rewards/rejected": -1.9306520223617554, + "step": 231 + }, + { + "epoch": 0.4905186653452263, + "grad_norm": 0.6270304322242737, + "learning_rate": 6.029893093705491e-07, + "logits/chosen": -0.692166805267334, + "logits/rejected": -0.6799293756484985, + "logps/chosen": -0.7850213646888733, + "logps/rejected": -0.8839574456214905, + "loss": 1.2967, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.5700427293777466, + "rewards/margins": 0.19787229597568512, + "rewards/rejected": -1.767914891242981, + "step": 232 + }, + { + "epoch": 0.4926329699372316, + "grad_norm": 1.0160484313964844, + "learning_rate": 5.993612491492087e-07, + "logits/chosen": -0.7095844149589539, + "logits/rejected": -0.71524578332901, + "logps/chosen": -0.7063854336738586, + "logps/rejected": -0.8855549097061157, + "loss": 1.2176, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.4127708673477173, + "rewards/margins": 0.3583390712738037, + "rewards/rejected": -1.7711098194122314, + "step": 233 + }, + { + "epoch": 0.4947472745292369, + "grad_norm": 2.225841999053955, + "learning_rate": 5.957277340744094e-07, + "logits/chosen": -0.7488946318626404, + "logits/rejected": -0.7588428854942322, + "logps/chosen": -0.9203822612762451, + "logps/rejected": -1.0089298486709595, + "loss": 1.355, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8407645225524902, + "rewards/margins": 0.17709502577781677, + "rewards/rejected": -2.017859697341919, + "step": 234 + }, + { + "epoch": 0.4968615791212422, + "grad_norm": 1.9577795267105103, + "learning_rate": 5.920889636232351e-07, + "logits/chosen": -0.8078997731208801, + "logits/rejected": -0.8064825534820557, + "logps/chosen": -0.8004480004310608, + "logps/rejected": -0.9856831431388855, + "loss": 1.2273, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.6008960008621216, + "rewards/margins": 0.3704703152179718, + "rewards/rejected": -1.971366286277771, + "step": 235 + }, + { + "epoch": 0.4989758837132474, + "grad_norm": 2.5050246715545654, + "learning_rate": 5.884451375612865e-07, + "logits/chosen": -0.7499472498893738, + "logits/rejected": -0.7421904802322388, + "logps/chosen": -0.8363584876060486, + "logps/rejected": -0.9543781876564026, + "loss": 1.3002, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6727169752120972, + "rewards/margins": 0.23603934049606323, + "rewards/rejected": -1.9087563753128052, + "step": 236 + }, + { + "epoch": 0.5010901883052528, + "grad_norm": 0.585436224937439, + "learning_rate": 5.847964559317128e-07, + "logits/chosen": -0.730015218257904, + "logits/rejected": -0.7154791355133057, + "logps/chosen": -0.8828849196434021, + "logps/rejected": -0.9897070527076721, + "loss": 1.347, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.7657698392868042, + "rewards/margins": 0.21364440023899078, + "rewards/rejected": -1.9794141054153442, + "step": 237 + }, + { + "epoch": 0.503204492897258, + "grad_norm": 0.9204092621803284, + "learning_rate": 5.8114311904423e-07, + "logits/chosen": -0.759974479675293, + "logits/rejected": -0.7793674468994141, + "logps/chosen": -0.8321584463119507, + "logps/rejected": -1.0809751749038696, + "loss": 1.2185, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6643168926239014, + "rewards/margins": 0.4976334273815155, + "rewards/rejected": -2.1619503498077393, + "step": 238 + }, + { + "epoch": 0.5053187974892633, + "grad_norm": 5.147011756896973, + "learning_rate": 5.774853274641243e-07, + "logits/chosen": -0.7148956060409546, + "logits/rejected": -0.7363921403884888, + "logps/chosen": -0.8623124361038208, + "logps/rejected": -1.0681498050689697, + "loss": 1.2353, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7246248722076416, + "rewards/margins": 0.4116746187210083, + "rewards/rejected": -2.1362996101379395, + "step": 239 + }, + { + "epoch": 0.5074331020812686, + "grad_norm": 1.9065529108047485, + "learning_rate": 5.738232820012407e-07, + "logits/chosen": -0.7158540487289429, + "logits/rejected": -0.7083900570869446, + "logps/chosen": -0.981558620929718, + "logps/rejected": -1.054612636566162, + "loss": 1.3594, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.963117241859436, + "rewards/margins": 0.14610806107521057, + "rewards/rejected": -2.109225273132324, + "step": 240 + }, + { + "epoch": 0.5095474066732739, + "grad_norm": 2.4411256313323975, + "learning_rate": 5.701571836989591e-07, + "logits/chosen": -0.8441444039344788, + "logits/rejected": -0.8529233336448669, + "logps/chosen": -0.8665949702262878, + "logps/rejected": -1.030572772026062, + "loss": 1.2477, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.7331899404525757, + "rewards/margins": 0.3279556334018707, + "rewards/rejected": -2.061145544052124, + "step": 241 + }, + { + "epoch": 0.5116617112652792, + "grad_norm": 2.461113214492798, + "learning_rate": 5.664872338231571e-07, + "logits/chosen": -0.7463312149047852, + "logits/rejected": -0.7725105285644531, + "logps/chosen": -0.9185941815376282, + "logps/rejected": -1.1244423389434814, + "loss": 1.2404, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.8371883630752563, + "rewards/margins": 0.411696195602417, + "rewards/rejected": -2.248884677886963, + "step": 242 + }, + { + "epoch": 0.5137760158572844, + "grad_norm": 3.5861761569976807, + "learning_rate": 5.628136338511607e-07, + "logits/chosen": -0.8432914018630981, + "logits/rejected": -0.85801100730896, + "logps/chosen": -0.8873915672302246, + "logps/rejected": -1.0090795755386353, + "loss": 1.3072, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.7747831344604492, + "rewards/margins": 0.24337637424468994, + "rewards/rejected": -2.0181591510772705, + "step": 243 + }, + { + "epoch": 0.5158903204492897, + "grad_norm": 2.109071969985962, + "learning_rate": 5.591365854606829e-07, + "logits/chosen": -0.7899532318115234, + "logits/rejected": -0.7548331618309021, + "logps/chosen": -0.9333330392837524, + "logps/rejected": -1.00949227809906, + "loss": 1.3749, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.8666660785675049, + "rewards/margins": 0.1523183286190033, + "rewards/rejected": -2.01898455619812, + "step": 244 + }, + { + "epoch": 0.518004625041295, + "grad_norm": 2.2017955780029297, + "learning_rate": 5.554562905187527e-07, + "logits/chosen": -0.7569047212600708, + "logits/rejected": -0.7679808735847473, + "logps/chosen": -0.9779613614082336, + "logps/rejected": -1.1713427305221558, + "loss": 1.2628, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.9559227228164673, + "rewards/margins": 0.3867628276348114, + "rewards/rejected": -2.3426854610443115, + "step": 245 + }, + { + "epoch": 0.5201189296333003, + "grad_norm": 4.651991367340088, + "learning_rate": 5.517729510706315e-07, + "logits/chosen": -0.8546395301818848, + "logits/rejected": -0.8609369397163391, + "logps/chosen": -0.9926605224609375, + "logps/rejected": -1.1553713083267212, + "loss": 1.2812, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.985321044921875, + "rewards/margins": 0.32542160153388977, + "rewards/rejected": -2.3107426166534424, + "step": 246 + }, + { + "epoch": 0.5222332342253055, + "grad_norm": 2.6384060382843018, + "learning_rate": 5.480867693287223e-07, + "logits/chosen": -0.7734386324882507, + "logits/rejected": -0.7963250875473022, + "logps/chosen": -0.8996341824531555, + "logps/rejected": -1.0466523170471191, + "loss": 1.2849, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.799268364906311, + "rewards/margins": 0.2940361201763153, + "rewards/rejected": -2.0933046340942383, + "step": 247 + }, + { + "epoch": 0.5243475388173109, + "grad_norm": 1.3608977794647217, + "learning_rate": 5.443979476614674e-07, + "logits/chosen": -0.7350472807884216, + "logits/rejected": -0.7215992212295532, + "logps/chosen": -0.8887076377868652, + "logps/rejected": -1.0147045850753784, + "loss": 1.3182, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7774152755737305, + "rewards/margins": 0.25199398398399353, + "rewards/rejected": -2.029409170150757, + "step": 248 + }, + { + "epoch": 0.5264618434093161, + "grad_norm": 3.017115354537964, + "learning_rate": 5.407066885822391e-07, + "logits/chosen": -0.827782154083252, + "logits/rejected": -0.8471929430961609, + "logps/chosen": -0.9262440800666809, + "logps/rejected": -1.1658306121826172, + "loss": 1.1882, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.8524881601333618, + "rewards/margins": 0.47917306423187256, + "rewards/rejected": -2.3316612243652344, + "step": 249 + }, + { + "epoch": 0.5285761480013215, + "grad_norm": 0.7805312275886536, + "learning_rate": 5.370131947382214e-07, + "logits/chosen": -0.7815499305725098, + "logits/rejected": -0.8279274702072144, + "logps/chosen": -0.968708872795105, + "logps/rejected": -1.2697322368621826, + "loss": 1.2092, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.93741774559021, + "rewards/margins": 0.6020466685295105, + "rewards/rejected": -2.5394644737243652, + "step": 250 + }, + { + "epoch": 0.5306904525933267, + "grad_norm": 2.229363441467285, + "learning_rate": 5.333176688992855e-07, + "logits/chosen": -0.7824153900146484, + "logits/rejected": -0.8154900074005127, + "logps/chosen": -1.0211957693099976, + "logps/rejected": -1.2145965099334717, + "loss": 1.3074, + "rewards/accuracies": 0.609375, + "rewards/chosen": -2.042391538619995, + "rewards/margins": 0.3868010938167572, + "rewards/rejected": -2.4291930198669434, + "step": 251 + }, + { + "epoch": 0.532804757185332, + "grad_norm": 1.1359837055206299, + "learning_rate": 5.296203139468571e-07, + "logits/chosen": -0.7467613220214844, + "logits/rejected": -0.7548531889915466, + "logps/chosen": -1.0614902973175049, + "logps/rejected": -1.2674376964569092, + "loss": 1.2512, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.1229805946350098, + "rewards/margins": 0.4118950664997101, + "rewards/rejected": -2.5348753929138184, + "step": 252 + }, + { + "epoch": 0.5349190617773373, + "grad_norm": 3.0548548698425293, + "learning_rate": 5.259213328627792e-07, + "logits/chosen": -0.7868636250495911, + "logits/rejected": -0.8130850791931152, + "logps/chosen": -1.0743666887283325, + "logps/rejected": -1.2010191679000854, + "loss": 1.3275, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.148733377456665, + "rewards/margins": 0.2533051669597626, + "rewards/rejected": -2.402038335800171, + "step": 253 + }, + { + "epoch": 0.5370333663693426, + "grad_norm": 1.7205246686935425, + "learning_rate": 5.222209287181676e-07, + "logits/chosen": -0.81404709815979, + "logits/rejected": -0.8481613397598267, + "logps/chosen": -1.1599587202072144, + "logps/rejected": -1.4234716892242432, + "loss": 1.2894, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.3199174404144287, + "rewards/margins": 0.5270256400108337, + "rewards/rejected": -2.8469433784484863, + "step": 254 + }, + { + "epoch": 0.5391476709613479, + "grad_norm": 2.2516112327575684, + "learning_rate": 5.185193046622634e-07, + "logits/chosen": -0.8112510442733765, + "logits/rejected": -0.8310728073120117, + "logps/chosen": -1.1263186931610107, + "logps/rejected": -1.3256827592849731, + "loss": 1.3552, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.2526373863220215, + "rewards/margins": 0.39872825145721436, + "rewards/rejected": -2.6513655185699463, + "step": 255 + }, + { + "epoch": 0.5412619755533532, + "grad_norm": 2.8379359245300293, + "learning_rate": 5.148166639112799e-07, + "logits/chosen": -0.8202102184295654, + "logits/rejected": -0.845209002494812, + "logps/chosen": -1.264180302619934, + "logps/rejected": -1.6190590858459473, + "loss": 1.2083, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.528360605239868, + "rewards/margins": 0.7097575068473816, + "rewards/rejected": -3.2381181716918945, + "step": 256 + }, + { + "epoch": 0.5433762801453584, + "grad_norm": 4.676355838775635, + "learning_rate": 5.111132097372459e-07, + "logits/chosen": -0.8866451978683472, + "logits/rejected": -0.8642281889915466, + "logps/chosen": -1.3194389343261719, + "logps/rejected": -1.4506916999816895, + "loss": 1.4002, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.6388778686523438, + "rewards/margins": 0.2625058591365814, + "rewards/rejected": -2.901383399963379, + "step": 257 + }, + { + "epoch": 0.5454905847373638, + "grad_norm": 2.55251407623291, + "learning_rate": 5.074091454568463e-07, + "logits/chosen": -0.7903708815574646, + "logits/rejected": -0.8010709881782532, + "logps/chosen": -1.3550961017608643, + "logps/rejected": -1.661428451538086, + "loss": 1.2131, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.7101922035217285, + "rewards/margins": 0.6126645803451538, + "rewards/rejected": -3.322856903076172, + "step": 258 + }, + { + "epoch": 0.547604889329369, + "grad_norm": 4.116572856903076, + "learning_rate": 5.037046744202611e-07, + "logits/chosen": -0.7501232624053955, + "logits/rejected": -0.7825176119804382, + "logps/chosen": -1.2111856937408447, + "logps/rejected": -1.5176191329956055, + "loss": 1.1345, + "rewards/accuracies": 0.71875, + "rewards/chosen": -2.4223713874816895, + "rewards/margins": 0.6128667593002319, + "rewards/rejected": -3.035238265991211, + "step": 259 + }, + { + "epoch": 0.5497191939213742, + "grad_norm": 2.0285205841064453, + "learning_rate": 5e-07, + "logits/chosen": -0.8355445861816406, + "logits/rejected": -0.8497716784477234, + "logps/chosen": -1.1876304149627686, + "logps/rejected": -1.4788450002670288, + "loss": 1.1559, + "rewards/accuracies": 0.671875, + "rewards/chosen": -2.375260829925537, + "rewards/margins": 0.5824294090270996, + "rewards/rejected": -2.9576900005340576, + "step": 260 + }, + { + "epoch": 0.5518334985133796, + "grad_norm": 4.681185245513916, + "learning_rate": 4.962953255797389e-07, + "logits/chosen": -0.8240503072738647, + "logits/rejected": -0.8016488552093506, + "logps/chosen": -1.2238959074020386, + "logps/rejected": -1.4727882146835327, + "loss": 1.2914, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.447791814804077, + "rewards/margins": 0.49778467416763306, + "rewards/rejected": -2.9455764293670654, + "step": 261 + }, + { + "epoch": 0.5539478031053848, + "grad_norm": 5.15679931640625, + "learning_rate": 4.925908545431537e-07, + "logits/chosen": -0.728940486907959, + "logits/rejected": -0.7355924248695374, + "logps/chosen": -1.3356778621673584, + "logps/rejected": -1.6726096868515015, + "loss": 1.1434, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.671355724334717, + "rewards/margins": 0.6738637685775757, + "rewards/rejected": -3.345219373703003, + "step": 262 + }, + { + "epoch": 0.5560621076973902, + "grad_norm": 2.481048345565796, + "learning_rate": 4.888867902627543e-07, + "logits/chosen": -0.8311591148376465, + "logits/rejected": -0.8191719055175781, + "logps/chosen": -1.2743335962295532, + "logps/rejected": -1.5339927673339844, + "loss": 1.2164, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.5486671924591064, + "rewards/margins": 0.5193185210227966, + "rewards/rejected": -3.0679855346679688, + "step": 263 + }, + { + "epoch": 0.5581764122893954, + "grad_norm": 3.6758291721343994, + "learning_rate": 4.851833360887201e-07, + "logits/chosen": -0.6787989735603333, + "logits/rejected": -0.668928325176239, + "logps/chosen": -1.2278664112091064, + "logps/rejected": -1.4955706596374512, + "loss": 1.1942, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -2.455732822418213, + "rewards/margins": 0.535408616065979, + "rewards/rejected": -2.9911413192749023, + "step": 264 + }, + { + "epoch": 0.5602907168814008, + "grad_norm": 2.7282023429870605, + "learning_rate": 4.814806953377365e-07, + "logits/chosen": -0.7772133350372314, + "logits/rejected": -0.7689889073371887, + "logps/chosen": -1.1954048871994019, + "logps/rejected": -1.444943904876709, + "loss": 1.2686, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.3908097743988037, + "rewards/margins": 0.4990782141685486, + "rewards/rejected": -2.889887809753418, + "step": 265 + }, + { + "epoch": 0.562405021473406, + "grad_norm": 2.8753116130828857, + "learning_rate": 4.777790712818323e-07, + "logits/chosen": -0.6946043968200684, + "logits/rejected": -0.7001516819000244, + "logps/chosen": -1.2844620943069458, + "logps/rejected": -1.486103892326355, + "loss": 1.284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.5689241886138916, + "rewards/margins": 0.4032836854457855, + "rewards/rejected": -2.97220778465271, + "step": 266 + }, + { + "epoch": 0.5645193260654113, + "grad_norm": 1.5583593845367432, + "learning_rate": 4.740786671372209e-07, + "logits/chosen": -0.7396820187568665, + "logits/rejected": -0.7129873037338257, + "logps/chosen": -1.410097599029541, + "logps/rejected": -1.6091456413269043, + "loss": 1.3158, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.820195198059082, + "rewards/margins": 0.3980959951877594, + "rewards/rejected": -3.2182912826538086, + "step": 267 + }, + { + "epoch": 0.5666336306574166, + "grad_norm": 3.5984952449798584, + "learning_rate": 4.703796860531429e-07, + "logits/chosen": -0.7031830549240112, + "logits/rejected": -0.700330376625061, + "logps/chosen": -1.633664608001709, + "logps/rejected": -1.9186874628067017, + "loss": 1.2479, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.267329216003418, + "rewards/margins": 0.5700456500053406, + "rewards/rejected": -3.8373749256134033, + "step": 268 + }, + { + "epoch": 0.5687479352494219, + "grad_norm": 6.295733451843262, + "learning_rate": 4.666823311007144e-07, + "logits/chosen": -0.8001950979232788, + "logits/rejected": -0.8042099475860596, + "logps/chosen": -1.5675832033157349, + "logps/rejected": -1.9247075319290161, + "loss": 1.1759, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.1351664066314697, + "rewards/margins": 0.7142485976219177, + "rewards/rejected": -3.8494150638580322, + "step": 269 + }, + { + "epoch": 0.5708622398414271, + "grad_norm": 3.6349036693573, + "learning_rate": 4.6298680526177855e-07, + "logits/chosen": -0.8108068704605103, + "logits/rejected": -0.8030902147293091, + "logps/chosen": -1.8205997943878174, + "logps/rejected": -2.195197105407715, + "loss": 1.1864, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.6411995887756348, + "rewards/margins": 0.7491948008537292, + "rewards/rejected": -4.39039421081543, + "step": 270 + }, + { + "epoch": 0.5729765444334325, + "grad_norm": 4.786395072937012, + "learning_rate": 4.59293311417761e-07, + "logits/chosen": -0.798182487487793, + "logits/rejected": -0.7736828327178955, + "logps/chosen": -1.8617057800292969, + "logps/rejected": -2.08984637260437, + "loss": 1.3947, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.7234115600585938, + "rewards/margins": 0.4562810957431793, + "rewards/rejected": -4.17969274520874, + "step": 271 + }, + { + "epoch": 0.5750908490254377, + "grad_norm": 6.7946457862854, + "learning_rate": 4.556020523385326e-07, + "logits/chosen": -0.7530428171157837, + "logits/rejected": -0.7395590543746948, + "logps/chosen": -1.8709862232208252, + "logps/rejected": -2.3599390983581543, + "loss": 1.1025, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.7419724464416504, + "rewards/margins": 0.9779053926467896, + "rewards/rejected": -4.719878196716309, + "step": 272 + }, + { + "epoch": 0.5772051536174431, + "grad_norm": 4.877624988555908, + "learning_rate": 4.5191323067127773e-07, + "logits/chosen": -0.7732480764389038, + "logits/rejected": -0.7835702300071716, + "logps/chosen": -2.0340800285339355, + "logps/rejected": -2.330742835998535, + "loss": 1.3198, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.068160057067871, + "rewards/margins": 0.5933258533477783, + "rewards/rejected": -4.66148567199707, + "step": 273 + }, + { + "epoch": 0.5793194582094483, + "grad_norm": 9.001680374145508, + "learning_rate": 4.482270489293685e-07, + "logits/chosen": -0.9062263369560242, + "logits/rejected": -0.9105854630470276, + "logps/chosen": -2.1364972591400146, + "logps/rejected": -2.4467523097991943, + "loss": 1.3464, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.272994518280029, + "rewards/margins": 0.6205099821090698, + "rewards/rejected": -4.893504619598389, + "step": 274 + }, + { + "epoch": 0.5814337628014535, + "grad_norm": 2.811025619506836, + "learning_rate": 4.445437094812475e-07, + "logits/chosen": -0.8593579530715942, + "logits/rejected": -0.8343831896781921, + "logps/chosen": -2.452843189239502, + "logps/rejected": -2.7551848888397217, + "loss": 1.3536, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.905686378479004, + "rewards/margins": 0.6046838760375977, + "rewards/rejected": -5.510369777679443, + "step": 275 + }, + { + "epoch": 0.5835480673934589, + "grad_norm": 2.2030158042907715, + "learning_rate": 4.4086341453931714e-07, + "logits/chosen": -0.8991417288780212, + "logits/rejected": -0.8766486644744873, + "logps/chosen": -2.30641508102417, + "logps/rejected": -2.7606654167175293, + "loss": 1.1708, + "rewards/accuracies": 0.6875, + "rewards/chosen": -4.61283016204834, + "rewards/margins": 0.9085015654563904, + "rewards/rejected": -5.521330833435059, + "step": 276 + }, + { + "epoch": 0.5856623719854641, + "grad_norm": 5.5185227394104, + "learning_rate": 4.371863661488393e-07, + "logits/chosen": -0.8738227486610413, + "logits/rejected": -0.8665530681610107, + "logps/chosen": -2.29125714302063, + "logps/rejected": -2.7014153003692627, + "loss": 1.1883, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.58251428604126, + "rewards/margins": 0.8203167915344238, + "rewards/rejected": -5.402830600738525, + "step": 277 + }, + { + "epoch": 0.5877766765774695, + "grad_norm": 2.0779521465301514, + "learning_rate": 4.3351276617684285e-07, + "logits/chosen": -0.958415150642395, + "logits/rejected": -0.9585077166557312, + "logps/chosen": -2.4368410110473633, + "logps/rejected": -2.798506736755371, + "loss": 1.1749, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.873682022094727, + "rewards/margins": 0.7233313322067261, + "rewards/rejected": -5.597013473510742, + "step": 278 + }, + { + "epoch": 0.5898909811694747, + "grad_norm": 2.884877920150757, + "learning_rate": 4.29842816301041e-07, + "logits/chosen": -0.9413051605224609, + "logits/rejected": -0.9224691987037659, + "logps/chosen": -2.485034942626953, + "logps/rejected": -2.911332368850708, + "loss": 1.2035, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.970069885253906, + "rewards/margins": 0.8525944948196411, + "rewards/rejected": -5.822664737701416, + "step": 279 + }, + { + "epoch": 0.59200528576148, + "grad_norm": 5.203248500823975, + "learning_rate": 4.2617671799875944e-07, + "logits/chosen": -0.9359334111213684, + "logits/rejected": -0.9387660026550293, + "logps/chosen": -2.378349542617798, + "logps/rejected": -2.730886936187744, + "loss": 1.2253, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.756699085235596, + "rewards/margins": 0.7050745487213135, + "rewards/rejected": -5.461773872375488, + "step": 280 + }, + { + "epoch": 0.5941195903534853, + "grad_norm": 6.818525314331055, + "learning_rate": 4.225146725358758e-07, + "logits/chosen": -0.8864554166793823, + "logits/rejected": -0.8813320398330688, + "logps/chosen": -2.4233975410461426, + "logps/rejected": -2.8188178539276123, + "loss": 1.2281, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -4.846795082092285, + "rewards/margins": 0.7908411622047424, + "rewards/rejected": -5.637635707855225, + "step": 281 + }, + { + "epoch": 0.5962338949454906, + "grad_norm": 2.529154062271118, + "learning_rate": 4.1885688095577e-07, + "logits/chosen": -0.8420325517654419, + "logits/rejected": -0.8822402954101562, + "logps/chosen": -2.626488447189331, + "logps/rejected": -3.1887192726135254, + "loss": 1.0561, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.252976894378662, + "rewards/margins": 1.1244611740112305, + "rewards/rejected": -6.377438545227051, + "step": 282 + }, + { + "epoch": 0.5983481995374959, + "grad_norm": 3.0739686489105225, + "learning_rate": 4.152035440682873e-07, + "logits/chosen": -0.8550993204116821, + "logits/rejected": -0.8528580665588379, + "logps/chosen": -2.6387887001037598, + "logps/rejected": -2.9952192306518555, + "loss": 1.3409, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.2775774002075195, + "rewards/margins": 0.7128612399101257, + "rewards/rejected": -5.990438461303711, + "step": 283 + }, + { + "epoch": 0.6004625041295012, + "grad_norm": 3.6649062633514404, + "learning_rate": 4.1155486243871363e-07, + "logits/chosen": -0.8643282651901245, + "logits/rejected": -0.9175342321395874, + "logps/chosen": -2.929072618484497, + "logps/rejected": -3.105940580368042, + "loss": 1.5121, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.858145236968994, + "rewards/margins": 0.3537355065345764, + "rewards/rejected": -6.211881160736084, + "step": 284 + }, + { + "epoch": 0.6025768087215064, + "grad_norm": 2.5071723461151123, + "learning_rate": 4.0791103637676486e-07, + "logits/chosen": -0.8368631601333618, + "logits/rejected": -0.819808304309845, + "logps/chosen": -3.0672600269317627, + "logps/rejected": -3.4685003757476807, + "loss": 1.3236, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.134520053863525, + "rewards/margins": 0.8024805784225464, + "rewards/rejected": -6.937000751495361, + "step": 285 + }, + { + "epoch": 0.6046911133135118, + "grad_norm": 8.780280113220215, + "learning_rate": 4.042722659255906e-07, + "logits/chosen": -0.8249569535255432, + "logits/rejected": -0.8442113995552063, + "logps/chosen": -3.3199872970581055, + "logps/rejected": -3.7276291847229004, + "loss": 1.322, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -6.639974594116211, + "rewards/margins": 0.8152831792831421, + "rewards/rejected": -7.455258369445801, + "step": 286 + }, + { + "epoch": 0.606805417905517, + "grad_norm": 3.4388678073883057, + "learning_rate": 4.006387508507914e-07, + "logits/chosen": -0.7224047780036926, + "logits/rejected": -0.7616450786590576, + "logps/chosen": -2.9411330223083496, + "logps/rejected": -3.32680082321167, + "loss": 1.2868, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.882266044616699, + "rewards/margins": 0.7713361978530884, + "rewards/rejected": -6.65360164642334, + "step": 287 + }, + { + "epoch": 0.6089197224975224, + "grad_norm": 5.095273971557617, + "learning_rate": 3.970106906294509e-07, + "logits/chosen": -0.7394692897796631, + "logits/rejected": -0.7316830158233643, + "logps/chosen": -2.9902045726776123, + "logps/rejected": -3.469916820526123, + "loss": 1.1694, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.980409145355225, + "rewards/margins": 0.9594244360923767, + "rewards/rejected": -6.939833641052246, + "step": 288 + }, + { + "epoch": 0.6110340270895276, + "grad_norm": 2.1398613452911377, + "learning_rate": 3.933882844391866e-07, + "logits/chosen": -0.8181312084197998, + "logits/rejected": -0.833306610584259, + "logps/chosen": -3.0137529373168945, + "logps/rejected": -3.4241840839385986, + "loss": 1.2453, + "rewards/accuracies": 0.609375, + "rewards/chosen": -6.027505874633789, + "rewards/margins": 0.8208625316619873, + "rewards/rejected": -6.848368167877197, + "step": 289 + }, + { + "epoch": 0.6131483316815328, + "grad_norm": 4.185284614562988, + "learning_rate": 3.89771731147214e-07, + "logits/chosen": -0.7805104851722717, + "logits/rejected": -0.8086984753608704, + "logps/chosen": -2.984957218170166, + "logps/rejected": -3.430112361907959, + "loss": 1.2671, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -5.969914436340332, + "rewards/margins": 0.890310525894165, + "rewards/rejected": -6.860224723815918, + "step": 290 + }, + { + "epoch": 0.6152626362735382, + "grad_norm": 7.104829788208008, + "learning_rate": 3.861612292994292e-07, + "logits/chosen": -0.7788286209106445, + "logits/rejected": -0.8027424216270447, + "logps/chosen": -2.896563768386841, + "logps/rejected": -3.1082046031951904, + "loss": 1.4853, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.793127536773682, + "rewards/margins": 0.42328107357025146, + "rewards/rejected": -6.216409206390381, + "step": 291 + }, + { + "epoch": 0.6173769408655434, + "grad_norm": 3.795579433441162, + "learning_rate": 3.825569771095082e-07, + "logits/chosen": -0.8044757843017578, + "logits/rejected": -0.7828265428543091, + "logps/chosen": -2.8059256076812744, + "logps/rejected": -3.3121094703674316, + "loss": 1.1299, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.611851215362549, + "rewards/margins": 1.0123679637908936, + "rewards/rejected": -6.624218940734863, + "step": 292 + }, + { + "epoch": 0.6194912454575487, + "grad_norm": 4.486142158508301, + "learning_rate": 3.7895917244802655e-07, + "logits/chosen": -0.7511788606643677, + "logits/rejected": -0.7885503768920898, + "logps/chosen": -2.927251100540161, + "logps/rejected": -3.1605303287506104, + "loss": 1.426, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.854502201080322, + "rewards/margins": 0.466558575630188, + "rewards/rejected": -6.321060657501221, + "step": 293 + }, + { + "epoch": 0.621605550049554, + "grad_norm": 3.3942787647247314, + "learning_rate": 3.753680128315952e-07, + "logits/chosen": -0.8230300545692444, + "logits/rejected": -0.8042524456977844, + "logps/chosen": -2.524353504180908, + "logps/rejected": -2.8687357902526855, + "loss": 1.2653, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.048707008361816, + "rewards/margins": 0.6887640953063965, + "rewards/rejected": -5.737471580505371, + "step": 294 + }, + { + "epoch": 0.6237198546415593, + "grad_norm": 4.326812744140625, + "learning_rate": 3.717836954120178e-07, + "logits/chosen": -0.7763381004333496, + "logits/rejected": -0.7852378487586975, + "logps/chosen": -2.4861948490142822, + "logps/rejected": -2.8822267055511475, + "loss": 1.124, + "rewards/accuracies": 0.75, + "rewards/chosen": -4.9723896980285645, + "rewards/margins": 0.7920635938644409, + "rewards/rejected": -5.764453411102295, + "step": 295 + }, + { + "epoch": 0.6258341592335646, + "grad_norm": 3.886293649673462, + "learning_rate": 3.6820641696546627e-07, + "logits/chosen": -0.8350138664245605, + "logits/rejected": -0.8594292998313904, + "logps/chosen": -2.1301493644714355, + "logps/rejected": -2.3678014278411865, + "loss": 1.3532, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.260298728942871, + "rewards/margins": 0.4753049314022064, + "rewards/rejected": -4.735602855682373, + "step": 296 + }, + { + "epoch": 0.6279484638255699, + "grad_norm": 1.9318888187408447, + "learning_rate": 3.6463637388167875e-07, + "logits/chosen": -0.812870979309082, + "logits/rejected": -0.8393633961677551, + "logps/chosen": -2.0607728958129883, + "logps/rejected": -2.4457521438598633, + "loss": 1.2317, + "rewards/accuracies": 0.609375, + "rewards/chosen": -4.121545791625977, + "rewards/margins": 0.76995849609375, + "rewards/rejected": -4.891504287719727, + "step": 297 + }, + { + "epoch": 0.6300627684175751, + "grad_norm": 2.731139898300171, + "learning_rate": 3.610737621531781e-07, + "logits/chosen": -0.7860711216926575, + "logits/rejected": -0.8006534576416016, + "logps/chosen": -1.9324530363082886, + "logps/rejected": -2.2838711738586426, + "loss": 1.2986, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.864906072616577, + "rewards/margins": 0.7028359174728394, + "rewards/rejected": -4.567742347717285, + "step": 298 + }, + { + "epoch": 0.6321770730095805, + "grad_norm": 3.118441581726074, + "learning_rate": 3.575187773645112e-07, + "logits/chosen": -0.6946629285812378, + "logits/rejected": -0.6832380294799805, + "logps/chosen": -2.2569775581359863, + "logps/rejected": -2.6153128147125244, + "loss": 1.2166, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.513955116271973, + "rewards/margins": 0.7166703343391418, + "rewards/rejected": -5.230625629425049, + "step": 299 + }, + { + "epoch": 0.6342913776015857, + "grad_norm": 4.998100757598877, + "learning_rate": 3.5397161468151214e-07, + "logits/chosen": -0.7972643375396729, + "logits/rejected": -0.7864660620689392, + "logps/chosen": -2.227022886276245, + "logps/rejected": -2.57175350189209, + "loss": 1.2796, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.45404577255249, + "rewards/margins": 0.6894608736038208, + "rewards/rejected": -5.14350700378418, + "step": 300 + }, + { + "epoch": 0.6364056821935911, + "grad_norm": 6.259451866149902, + "learning_rate": 3.5043246884058777e-07, + "logits/chosen": -0.6282143592834473, + "logits/rejected": -0.6314865350723267, + "logps/chosen": -2.4372308254241943, + "logps/rejected": -2.8582205772399902, + "loss": 1.1592, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.874461650848389, + "rewards/margins": 0.8419792056083679, + "rewards/rejected": -5.7164411544799805, + "step": 301 + }, + { + "epoch": 0.6385199867855963, + "grad_norm": 2.577531337738037, + "learning_rate": 3.4690153413802653e-07, + "logits/chosen": -0.658220648765564, + "logits/rejected": -0.6330516934394836, + "logps/chosen": -2.6647050380706787, + "logps/rejected": -3.1917996406555176, + "loss": 1.2609, + "rewards/accuracies": 0.671875, + "rewards/chosen": -5.329410076141357, + "rewards/margins": 1.0541892051696777, + "rewards/rejected": -6.383599281311035, + "step": 302 + }, + { + "epoch": 0.6406342913776016, + "grad_norm": 4.733935356140137, + "learning_rate": 3.4337900441933227e-07, + "logits/chosen": -0.5048555731773376, + "logits/rejected": -0.45112305879592896, + "logps/chosen": -2.5193920135498047, + "logps/rejected": -3.1279971599578857, + "loss": 1.0648, + "rewards/accuracies": 0.765625, + "rewards/chosen": -5.038784027099609, + "rewards/margins": 1.2172104120254517, + "rewards/rejected": -6.2559943199157715, + "step": 303 + }, + { + "epoch": 0.6427485959696069, + "grad_norm": 5.54962158203125, + "learning_rate": 3.3986507306858125e-07, + "logits/chosen": -0.5305406451225281, + "logits/rejected": -0.5246613025665283, + "logps/chosen": -2.8851962089538574, + "logps/rejected": -3.248018264770508, + "loss": 1.4329, + "rewards/accuracies": 0.625, + "rewards/chosen": -5.770392417907715, + "rewards/margins": 0.7256444692611694, + "rewards/rejected": -6.496036529541016, + "step": 304 + }, + { + "epoch": 0.6448629005616121, + "grad_norm": 2.827944278717041, + "learning_rate": 3.363599329978066e-07, + "logits/chosen": -0.4795135259628296, + "logits/rejected": -0.4911767244338989, + "logps/chosen": -3.0268373489379883, + "logps/rejected": -3.4411511421203613, + "loss": 1.4083, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.053674697875977, + "rewards/margins": 0.8286278247833252, + "rewards/rejected": -6.882302284240723, + "step": 305 + }, + { + "epoch": 0.6469772051536175, + "grad_norm": 5.35672664642334, + "learning_rate": 3.328637766364075e-07, + "logits/chosen": -0.4823904037475586, + "logits/rejected": -0.48555058240890503, + "logps/chosen": -2.990793466567993, + "logps/rejected": -3.529240846633911, + "loss": 1.1417, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.981586933135986, + "rewards/margins": 1.0768945217132568, + "rewards/rejected": -7.058481693267822, + "step": 306 + }, + { + "epoch": 0.6490915097456227, + "grad_norm": 2.8072359561920166, + "learning_rate": 3.2937679592058396e-07, + "logits/chosen": -0.4903571605682373, + "logits/rejected": -0.46411609649658203, + "logps/chosen": -2.8665530681610107, + "logps/rejected": -3.542123556137085, + "loss": 1.2485, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.7331061363220215, + "rewards/margins": 1.3511409759521484, + "rewards/rejected": -7.08424711227417, + "step": 307 + }, + { + "epoch": 0.651205814337628, + "grad_norm": 6.341434478759766, + "learning_rate": 3.2589918228280066e-07, + "logits/chosen": -0.4496378004550934, + "logits/rejected": -0.35389459133148193, + "logps/chosen": -2.8208916187286377, + "logps/rejected": -3.326601505279541, + "loss": 1.3089, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -5.641783237457275, + "rewards/margins": 1.011419653892517, + "rewards/rejected": -6.653203010559082, + "step": 308 + }, + { + "epoch": 0.6533201189296333, + "grad_norm": 2.5416784286499023, + "learning_rate": 3.2243112664127723e-07, + "logits/chosen": -0.44504135847091675, + "logits/rejected": -0.42088568210601807, + "logps/chosen": -2.7710533142089844, + "logps/rejected": -3.4406185150146484, + "loss": 1.2213, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.542106628417969, + "rewards/margins": 1.3391309976577759, + "rewards/rejected": -6.881237030029297, + "step": 309 + }, + { + "epoch": 0.6554344235216386, + "grad_norm": 4.573229789733887, + "learning_rate": 3.189728193895069e-07, + "logits/chosen": -0.31100764870643616, + "logits/rejected": -0.32552966475486755, + "logps/chosen": -3.099289655685425, + "logps/rejected": -3.5152204036712646, + "loss": 1.3571, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -6.19857931137085, + "rewards/margins": 0.8318620324134827, + "rewards/rejected": -7.030440807342529, + "step": 310 + }, + { + "epoch": 0.6575487281136438, + "grad_norm": 3.7587928771972656, + "learning_rate": 3.155244503858041e-07, + "logits/chosen": -0.4225979447364807, + "logits/rejected": -0.43882372975349426, + "logps/chosen": -2.9082608222961426, + "logps/rejected": -3.2239482402801514, + "loss": 1.3415, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.816521644592285, + "rewards/margins": 0.6313749551773071, + "rewards/rejected": -6.447896480560303, + "step": 311 + }, + { + "epoch": 0.6596630327056492, + "grad_norm": 5.79728889465332, + "learning_rate": 3.12086208942881e-07, + "logits/chosen": -0.48076939582824707, + "logits/rejected": -0.41990721225738525, + "logps/chosen": -2.7089650630950928, + "logps/rejected": -3.29990291595459, + "loss": 1.1423, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.4179301261901855, + "rewards/margins": 1.181876540184021, + "rewards/rejected": -6.59980583190918, + "step": 312 + }, + { + "epoch": 0.6617773372976544, + "grad_norm": 7.405224800109863, + "learning_rate": 3.086582838174551e-07, + "logits/chosen": -0.48003631830215454, + "logits/rejected": -0.40571871399879456, + "logps/chosen": -2.53741455078125, + "logps/rejected": -3.0145747661590576, + "loss": 1.3247, + "rewards/accuracies": 0.609375, + "rewards/chosen": -5.0748291015625, + "rewards/margins": 0.9543203115463257, + "rewards/rejected": -6.029149532318115, + "step": 313 + }, + { + "epoch": 0.6638916418896598, + "grad_norm": 6.371465682983398, + "learning_rate": 3.052408631998863e-07, + "logits/chosen": -0.42537638545036316, + "logits/rejected": -0.39384835958480835, + "logps/chosen": -3.006593942642212, + "logps/rejected": -3.4665465354919434, + "loss": 1.2648, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -6.013187885284424, + "rewards/margins": 0.919904887676239, + "rewards/rejected": -6.933093070983887, + "step": 314 + }, + { + "epoch": 0.666005946481665, + "grad_norm": 4.65411376953125, + "learning_rate": 3.018341347038453e-07, + "logits/chosen": -0.38848310708999634, + "logits/rejected": -0.3435167670249939, + "logps/chosen": -2.9562084674835205, + "logps/rejected": -3.5491316318511963, + "loss": 1.1353, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.912416934967041, + "rewards/margins": 1.1858452558517456, + "rewards/rejected": -7.098263263702393, + "step": 315 + }, + { + "epoch": 0.6681202510736703, + "grad_norm": 5.089771747589111, + "learning_rate": 2.9843828535601397e-07, + "logits/chosen": -0.3452882170677185, + "logits/rejected": -0.29303884506225586, + "logps/chosen": -2.5367987155914307, + "logps/rejected": -3.172724723815918, + "loss": 1.2002, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.073597431182861, + "rewards/margins": 1.2718524932861328, + "rewards/rejected": -6.345449447631836, + "step": 316 + }, + { + "epoch": 0.6702345556656756, + "grad_norm": 4.480255603790283, + "learning_rate": 2.9505350158581697e-07, + "logits/chosen": -0.47401517629623413, + "logits/rejected": -0.45950815081596375, + "logps/chosen": -2.45076322555542, + "logps/rejected": -2.998079299926758, + "loss": 1.2545, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.90152645111084, + "rewards/margins": 1.0946320295333862, + "rewards/rejected": -5.996158599853516, + "step": 317 + }, + { + "epoch": 0.6723488602576809, + "grad_norm": 3.6318399906158447, + "learning_rate": 2.916799692151884e-07, + "logits/chosen": -0.20774951577186584, + "logits/rejected": -0.21114808320999146, + "logps/chosen": -2.8932981491088867, + "logps/rejected": -3.613022565841675, + "loss": 1.1187, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.786596298217773, + "rewards/margins": 1.4394491910934448, + "rewards/rejected": -7.22604513168335, + "step": 318 + }, + { + "epoch": 0.6744631648496862, + "grad_norm": 6.601771831512451, + "learning_rate": 2.883178734483692e-07, + "logits/chosen": -0.3821495473384857, + "logits/rejected": -0.35181915760040283, + "logps/chosen": -2.5047662258148193, + "logps/rejected": -3.074918270111084, + "loss": 1.1545, + "rewards/accuracies": 0.71875, + "rewards/chosen": -5.009532451629639, + "rewards/margins": 1.1403042078018188, + "rewards/rejected": -6.149836540222168, + "step": 319 + }, + { + "epoch": 0.6765774694416914, + "grad_norm": 3.077775716781616, + "learning_rate": 2.849673988617399e-07, + "logits/chosen": -0.4517952799797058, + "logits/rejected": -0.3880998194217682, + "logps/chosen": -2.5404443740844727, + "logps/rejected": -3.007855176925659, + "loss": 1.2441, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -5.080888748168945, + "rewards/margins": 0.9348208904266357, + "rewards/rejected": -6.015710353851318, + "step": 320 + }, + { + "epoch": 0.6786917740336967, + "grad_norm": 4.130971908569336, + "learning_rate": 2.8162872939368674e-07, + "logits/chosen": -0.3455219566822052, + "logits/rejected": -0.3199109137058258, + "logps/chosen": -2.5115320682525635, + "logps/rejected": -3.0809438228607178, + "loss": 1.1814, + "rewards/accuracies": 0.71875, + "rewards/chosen": -5.023064136505127, + "rewards/margins": 1.1388237476348877, + "rewards/rejected": -6.1618876457214355, + "step": 321 + }, + { + "epoch": 0.680806078625702, + "grad_norm": 6.414750099182129, + "learning_rate": 2.783020483345057e-07, + "logits/chosen": -0.500693142414093, + "logits/rejected": -0.43053722381591797, + "logps/chosen": -2.627499580383301, + "logps/rejected": -3.176882266998291, + "loss": 1.2207, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.254999160766602, + "rewards/margins": 1.0987658500671387, + "rewards/rejected": -6.353764533996582, + "step": 322 + }, + { + "epoch": 0.6829203832177073, + "grad_norm": 3.8955185413360596, + "learning_rate": 2.749875383163377e-07, + "logits/chosen": -0.3386150896549225, + "logits/rejected": -0.3456903100013733, + "logps/chosen": -2.5545601844787598, + "logps/rejected": -3.0574111938476562, + "loss": 1.2667, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.1091203689575195, + "rewards/margins": 1.0057018995285034, + "rewards/rejected": -6.1148223876953125, + "step": 323 + }, + { + "epoch": 0.6850346878097126, + "grad_norm": 4.244959831237793, + "learning_rate": 2.7168538130314345e-07, + "logits/chosen": -0.4657687246799469, + "logits/rejected": -0.41878795623779297, + "logps/chosen": -2.3406989574432373, + "logps/rejected": -2.74613094329834, + "loss": 1.2982, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.681397914886475, + "rewards/margins": 0.8108637928962708, + "rewards/rejected": -5.49226188659668, + "step": 324 + }, + { + "epoch": 0.6871489924017179, + "grad_norm": 8.914139747619629, + "learning_rate": 2.683957585807136e-07, + "logits/chosen": -0.42120760679244995, + "logits/rejected": -0.34997111558914185, + "logps/chosen": -2.4362924098968506, + "logps/rejected": -2.8844237327575684, + "loss": 1.3185, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.872584819793701, + "rewards/margins": 0.8962627649307251, + "rewards/rejected": -5.768847465515137, + "step": 325 + }, + { + "epoch": 0.6892632969937231, + "grad_norm": 2.8318073749542236, + "learning_rate": 2.651188507467161e-07, + "logits/chosen": -0.4435175657272339, + "logits/rejected": -0.43688836693763733, + "logps/chosen": -2.316673994064331, + "logps/rejected": -2.6802306175231934, + "loss": 1.2727, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.633347988128662, + "rewards/margins": 0.7271134853363037, + "rewards/rejected": -5.360461235046387, + "step": 326 + }, + { + "epoch": 0.6913776015857285, + "grad_norm": 9.15845012664795, + "learning_rate": 2.618548377007817e-07, + "logits/chosen": -0.4659804105758667, + "logits/rejected": -0.43525823950767517, + "logps/chosen": -2.3177073001861572, + "logps/rejected": -2.674837350845337, + "loss": 1.3204, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.6354146003723145, + "rewards/margins": 0.7142605781555176, + "rewards/rejected": -5.349674701690674, + "step": 327 + }, + { + "epoch": 0.6934919061777337, + "grad_norm": 8.41653060913086, + "learning_rate": 2.5860389863462763e-07, + "logits/chosen": -0.42244386672973633, + "logits/rejected": -0.3488731384277344, + "logps/chosen": -2.3063669204711914, + "logps/rejected": -2.8124496936798096, + "loss": 1.2621, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.612733840942383, + "rewards/margins": 1.0121653079986572, + "rewards/rejected": -5.624899387359619, + "step": 328 + }, + { + "epoch": 0.695606210769739, + "grad_norm": 8.558746337890625, + "learning_rate": 2.5536621202221986e-07, + "logits/chosen": -0.4081762433052063, + "logits/rejected": -0.3913821578025818, + "logps/chosen": -2.331026554107666, + "logps/rejected": -2.799482583999634, + "loss": 1.2435, + "rewards/accuracies": 0.59375, + "rewards/chosen": -4.662053108215332, + "rewards/margins": 0.9369123578071594, + "rewards/rejected": -5.598965167999268, + "step": 329 + }, + { + "epoch": 0.6977205153617443, + "grad_norm": 7.550137519836426, + "learning_rate": 2.521419556099754e-07, + "logits/chosen": -0.5334538221359253, + "logits/rejected": -0.5046267509460449, + "logps/chosen": -2.3662197589874268, + "logps/rejected": -2.8178446292877197, + "loss": 1.2172, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -4.7324395179748535, + "rewards/margins": 0.9032500386238098, + "rewards/rejected": -5.6356892585754395, + "step": 330 + }, + { + "epoch": 0.6998348199537496, + "grad_norm": 4.939478397369385, + "learning_rate": 2.4893130640700364e-07, + "logits/chosen": -0.5103824138641357, + "logits/rejected": -0.49076637625694275, + "logps/chosen": -2.0302557945251465, + "logps/rejected": -2.4443471431732178, + "loss": 1.1939, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.060511589050293, + "rewards/margins": 0.8281831741333008, + "rewards/rejected": -4.8886942863464355, + "step": 331 + }, + { + "epoch": 0.7019491245457549, + "grad_norm": 5.584677219390869, + "learning_rate": 2.4573444067538985e-07, + "logits/chosen": -0.46035417914390564, + "logits/rejected": -0.4546043574810028, + "logps/chosen": -2.1907548904418945, + "logps/rejected": -2.4913454055786133, + "loss": 1.4253, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -4.381509780883789, + "rewards/margins": 0.6011807322502136, + "rewards/rejected": -4.982690811157227, + "step": 332 + }, + { + "epoch": 0.7040634291377602, + "grad_norm": 3.398441791534424, + "learning_rate": 2.425515339205165e-07, + "logits/chosen": -0.5569466352462769, + "logits/rejected": -0.5756793022155762, + "logps/chosen": -2.037411689758301, + "logps/rejected": -2.3700244426727295, + "loss": 1.3425, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -4.074823379516602, + "rewards/margins": 0.6652255654335022, + "rewards/rejected": -4.740048885345459, + "step": 333 + }, + { + "epoch": 0.7061777337297654, + "grad_norm": 8.54529094696045, + "learning_rate": 2.3938276088143e-07, + "logits/chosen": -0.5746757388114929, + "logits/rejected": -0.5874296426773071, + "logps/chosen": -2.1479601860046387, + "logps/rejected": -2.584625244140625, + "loss": 1.2366, + "rewards/accuracies": 0.71875, + "rewards/chosen": -4.295920372009277, + "rewards/margins": 0.8733301758766174, + "rewards/rejected": -5.16925048828125, + "step": 334 + }, + { + "epoch": 0.7082920383217707, + "grad_norm": 5.141815662384033, + "learning_rate": 2.362282955212473e-07, + "logits/chosen": -0.6492913961410522, + "logits/rejected": -0.5812432765960693, + "logps/chosen": -1.9753435850143433, + "logps/rejected": -2.340383768081665, + "loss": 1.2197, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.9506871700286865, + "rewards/margins": 0.7300805449485779, + "rewards/rejected": -4.68076753616333, + "step": 335 + }, + { + "epoch": 0.710406342913776, + "grad_norm": 5.991698265075684, + "learning_rate": 2.3308831101760483e-07, + "logits/chosen": -0.6887751221656799, + "logits/rejected": -0.6923843622207642, + "logps/chosen": -1.577715277671814, + "logps/rejected": -1.861379623413086, + "loss": 1.2608, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.155430555343628, + "rewards/margins": 0.5673283338546753, + "rewards/rejected": -3.722759246826172, + "step": 336 + }, + { + "epoch": 0.7125206475057813, + "grad_norm": 1.5719850063323975, + "learning_rate": 2.2996297975315097e-07, + "logits/chosen": -0.6095813512802124, + "logits/rejected": -0.5842909216880798, + "logps/chosen": -1.6973541975021362, + "logps/rejected": -2.1261086463928223, + "loss": 1.2424, + "rewards/accuracies": 0.609375, + "rewards/chosen": -3.3947083950042725, + "rewards/margins": 0.857509195804596, + "rewards/rejected": -4.2522172927856445, + "step": 337 + }, + { + "epoch": 0.7146349520977866, + "grad_norm": 4.785243511199951, + "learning_rate": 2.2685247330608414e-07, + "logits/chosen": -0.7062411308288574, + "logits/rejected": -0.6849475502967834, + "logps/chosen": -1.6659798622131348, + "logps/rejected": -1.980202555656433, + "loss": 1.2512, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.3319597244262695, + "rewards/margins": 0.6284454464912415, + "rewards/rejected": -3.960405111312866, + "step": 338 + }, + { + "epoch": 0.7167492566897918, + "grad_norm": 4.3183674812316895, + "learning_rate": 2.2375696244073123e-07, + "logits/chosen": -0.6655697822570801, + "logits/rejected": -0.6642571687698364, + "logps/chosen": -1.615012764930725, + "logps/rejected": -1.9022549390792847, + "loss": 1.398, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.23002552986145, + "rewards/margins": 0.5744845867156982, + "rewards/rejected": -3.8045098781585693, + "step": 339 + }, + { + "epoch": 0.7188635612817972, + "grad_norm": 3.458740472793579, + "learning_rate": 2.2067661709817382e-07, + "logits/chosen": -0.6138105988502502, + "logits/rejected": -0.6241220235824585, + "logps/chosen": -1.5244299173355103, + "logps/rejected": -1.8252849578857422, + "loss": 1.2257, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -3.0488598346710205, + "rewards/margins": 0.6017097234725952, + "rewards/rejected": -3.6505699157714844, + "step": 340 + }, + { + "epoch": 0.7209778658738024, + "grad_norm": 3.3990859985351562, + "learning_rate": 2.1761160638691838e-07, + "logits/chosen": -0.596839964389801, + "logits/rejected": -0.5929630398750305, + "logps/chosen": -1.4333155155181885, + "logps/rejected": -1.820554494857788, + "loss": 1.1124, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.866631031036377, + "rewards/margins": 0.7744779586791992, + "rewards/rejected": -3.641108989715576, + "step": 341 + }, + { + "epoch": 0.7230921704658078, + "grad_norm": 2.742016315460205, + "learning_rate": 2.1456209857361246e-07, + "logits/chosen": -0.6483213901519775, + "logits/rejected": -0.6418218612670898, + "logps/chosen": -1.4174959659576416, + "logps/rejected": -1.831233263015747, + "loss": 1.1372, + "rewards/accuracies": 0.703125, + "rewards/chosen": -2.834991931915283, + "rewards/margins": 0.8274745941162109, + "rewards/rejected": -3.662466526031494, + "step": 342 + }, + { + "epoch": 0.725206475057813, + "grad_norm": 2.5489015579223633, + "learning_rate": 2.1152826107380651e-07, + "logits/chosen": -0.599895179271698, + "logits/rejected": -0.6154446005821228, + "logps/chosen": -1.4996072053909302, + "logps/rejected": -1.7961615324020386, + "loss": 1.2288, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.9992144107818604, + "rewards/margins": 0.5931087732315063, + "rewards/rejected": -3.592323064804077, + "step": 343 + }, + { + "epoch": 0.7273207796498183, + "grad_norm": 2.8836190700531006, + "learning_rate": 2.0851026044276405e-07, + "logits/chosen": -0.7359989285469055, + "logits/rejected": -0.7111036777496338, + "logps/chosen": -1.32615065574646, + "logps/rejected": -1.6067696809768677, + "loss": 1.2088, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.65230131149292, + "rewards/margins": 0.5612384080886841, + "rewards/rejected": -3.2135393619537354, + "step": 344 + }, + { + "epoch": 0.7294350842418236, + "grad_norm": 3.1838135719299316, + "learning_rate": 2.0550826236631596e-07, + "logits/chosen": -0.6709272265434265, + "logits/rejected": -0.6708023548126221, + "logps/chosen": -1.2859303951263428, + "logps/rejected": -1.6929675340652466, + "loss": 1.1446, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.5718607902526855, + "rewards/margins": 0.8140743374824524, + "rewards/rejected": -3.385935068130493, + "step": 345 + }, + { + "epoch": 0.7315493888338289, + "grad_norm": 2.4209675788879395, + "learning_rate": 2.025224316517663e-07, + "logits/chosen": -0.7540403604507446, + "logits/rejected": -0.7601196765899658, + "logps/chosen": -1.3634543418884277, + "logps/rejected": -1.6112797260284424, + "loss": 1.2561, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.7269086837768555, + "rewards/margins": 0.4956510066986084, + "rewards/rejected": -3.2225594520568848, + "step": 346 + }, + { + "epoch": 0.7336636934258342, + "grad_norm": 5.405437469482422, + "learning_rate": 1.9955293221884402e-07, + "logits/chosen": -0.7241419553756714, + "logits/rejected": -0.7224253416061401, + "logps/chosen": -1.2650585174560547, + "logps/rejected": -1.639666199684143, + "loss": 1.1565, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.5301170349121094, + "rewards/margins": 0.7492151856422424, + "rewards/rejected": -3.279332399368286, + "step": 347 + }, + { + "epoch": 0.7357779980178395, + "grad_norm": 1.5863631963729858, + "learning_rate": 1.9659992709070344e-07, + "logits/chosen": -0.7479431629180908, + "logits/rejected": -0.7219806909561157, + "logps/chosen": -1.294840931892395, + "logps/rejected": -1.6082017421722412, + "loss": 1.1693, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.58968186378479, + "rewards/margins": 0.6267215013504028, + "rewards/rejected": -3.2164034843444824, + "step": 348 + }, + { + "epoch": 0.7378923026098447, + "grad_norm": 1.7051454782485962, + "learning_rate": 1.936635783849742e-07, + "logits/chosen": -0.6940132975578308, + "logits/rejected": -0.7377297878265381, + "logps/chosen": -1.1897408962249756, + "logps/rejected": -1.631073236465454, + "loss": 1.1069, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.379481792449951, + "rewards/margins": 0.8826643228530884, + "rewards/rejected": -3.262146472930908, + "step": 349 + }, + { + "epoch": 0.74000660720185, + "grad_norm": 2.704514980316162, + "learning_rate": 1.907440473048626e-07, + "logits/chosen": -0.6926394104957581, + "logits/rejected": -0.7064180374145508, + "logps/chosen": -1.1691362857818604, + "logps/rejected": -1.511006236076355, + "loss": 1.1541, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.3382725715637207, + "rewards/margins": 0.6837398409843445, + "rewards/rejected": -3.02201247215271, + "step": 350 + }, + { + "epoch": 0.7421209117938553, + "grad_norm": 2.3685505390167236, + "learning_rate": 1.8784149413030004e-07, + "logits/chosen": -0.7785338759422302, + "logits/rejected": -0.7802280187606812, + "logps/chosen": -1.267012119293213, + "logps/rejected": -1.5235991477966309, + "loss": 1.177, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.534024238586426, + "rewards/margins": 0.5131738781929016, + "rewards/rejected": -3.0471982955932617, + "step": 351 + }, + { + "epoch": 0.7442352163858605, + "grad_norm": 2.8642280101776123, + "learning_rate": 1.849560782091445e-07, + "logits/chosen": -0.8269493579864502, + "logits/rejected": -0.8431333899497986, + "logps/chosen": -1.228893518447876, + "logps/rejected": -1.5784943103790283, + "loss": 1.1764, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.457787036895752, + "rewards/margins": 0.6992017030715942, + "rewards/rejected": -3.1569886207580566, + "step": 352 + }, + { + "epoch": 0.7463495209778659, + "grad_norm": 4.742166996002197, + "learning_rate": 1.8208795794843246e-07, + "logits/chosen": -0.764488160610199, + "logits/rejected": -0.7553139925003052, + "logps/chosen": -1.3095338344573975, + "logps/rejected": -1.6771752834320068, + "loss": 1.0957, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -2.619067668914795, + "rewards/margins": 0.7352830171585083, + "rewards/rejected": -3.3543505668640137, + "step": 353 + }, + { + "epoch": 0.7484638255698711, + "grad_norm": 3.543769359588623, + "learning_rate": 1.7923729080568239e-07, + "logits/chosen": -0.7355642914772034, + "logits/rejected": -0.7744429707527161, + "logps/chosen": -1.3419017791748047, + "logps/rejected": -1.591749668121338, + "loss": 1.2579, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.6838035583496094, + "rewards/margins": 0.4996955990791321, + "rewards/rejected": -3.183499336242676, + "step": 354 + }, + { + "epoch": 0.7505781301618765, + "grad_norm": 4.187947750091553, + "learning_rate": 1.764042332802506e-07, + "logits/chosen": -0.7009099721908569, + "logits/rejected": -0.6947562098503113, + "logps/chosen": -1.3167665004730225, + "logps/rejected": -1.640596866607666, + "loss": 1.2269, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -2.633533000946045, + "rewards/margins": 0.6476608514785767, + "rewards/rejected": -3.281193733215332, + "step": 355 + }, + { + "epoch": 0.7526924347538817, + "grad_norm": 1.7813458442687988, + "learning_rate": 1.7358894090473924e-07, + "logits/chosen": -0.7276792526245117, + "logits/rejected": -0.7536065578460693, + "logps/chosen": -1.401429295539856, + "logps/rejected": -1.7458314895629883, + "loss": 1.1934, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.802858591079712, + "rewards/margins": 0.6888045072555542, + "rewards/rejected": -3.4916629791259766, + "step": 356 + }, + { + "epoch": 0.754806739345887, + "grad_norm": 2.3192296028137207, + "learning_rate": 1.7079156823645801e-07, + "logits/chosen": -0.6756848096847534, + "logits/rejected": -0.6988381743431091, + "logps/chosen": -1.36654531955719, + "logps/rejected": -1.6672351360321045, + "loss": 1.1928, + "rewards/accuracies": 0.671875, + "rewards/chosen": -2.73309063911438, + "rewards/margins": 0.6013798117637634, + "rewards/rejected": -3.334470272064209, + "step": 357 + }, + { + "epoch": 0.7569210439378923, + "grad_norm": 2.7722420692443848, + "learning_rate": 1.6801226884893893e-07, + "logits/chosen": -0.6857397556304932, + "logits/rejected": -0.7169467806816101, + "logps/chosen": -1.4047114849090576, + "logps/rejected": -1.733205795288086, + "loss": 1.16, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -2.8094229698181152, + "rewards/margins": 0.6569885015487671, + "rewards/rejected": -3.466411590576172, + "step": 358 + }, + { + "epoch": 0.7590353485298976, + "grad_norm": 6.300495624542236, + "learning_rate": 1.6525119532350506e-07, + "logits/chosen": -0.7457281947135925, + "logits/rejected": -0.7319377660751343, + "logps/chosen": -1.282365083694458, + "logps/rejected": -1.6675825119018555, + "loss": 1.0742, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -2.564730167388916, + "rewards/margins": 0.7704350352287292, + "rewards/rejected": -3.335165023803711, + "step": 359 + }, + { + "epoch": 0.7611496531219029, + "grad_norm": 3.5068228244781494, + "learning_rate": 1.6250849924089482e-07, + "logits/chosen": -0.7112680077552795, + "logits/rejected": -0.7166794538497925, + "logps/chosen": -1.3996254205703735, + "logps/rejected": -1.6635833978652954, + "loss": 1.2438, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.799250841140747, + "rewards/margins": 0.5279159545898438, + "rewards/rejected": -3.327166795730591, + "step": 360 + }, + { + "epoch": 0.7632639577139082, + "grad_norm": 1.421538233757019, + "learning_rate": 1.5978433117293883e-07, + "logits/chosen": -0.7009663581848145, + "logits/rejected": -0.6878695487976074, + "logps/chosen": -1.4174691438674927, + "logps/rejected": -1.802457332611084, + "loss": 1.0885, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -2.8349382877349854, + "rewards/margins": 0.7699761986732483, + "rewards/rejected": -3.604914665222168, + "step": 361 + }, + { + "epoch": 0.7653782623059134, + "grad_norm": 3.2645766735076904, + "learning_rate": 1.5707884067429471e-07, + "logits/chosen": -0.6865817904472351, + "logits/rejected": -0.7084690928459167, + "logps/chosen": -1.377517819404602, + "logps/rejected": -1.7079989910125732, + "loss": 1.2371, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.755035638809204, + "rewards/margins": 0.660962700843811, + "rewards/rejected": -3.4159979820251465, + "step": 362 + }, + { + "epoch": 0.7674925668979188, + "grad_norm": 1.973783254623413, + "learning_rate": 1.5439217627423695e-07, + "logits/chosen": -0.7317283153533936, + "logits/rejected": -0.7571225166320801, + "logps/chosen": -1.63040030002594, + "logps/rejected": -2.027442216873169, + "loss": 1.1614, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.26080060005188, + "rewards/margins": 0.7940834760665894, + "rewards/rejected": -4.054884433746338, + "step": 363 + }, + { + "epoch": 0.769606871489924, + "grad_norm": 4.545448303222656, + "learning_rate": 1.5172448546850163e-07, + "logits/chosen": -0.6746503710746765, + "logits/rejected": -0.7073549628257751, + "logps/chosen": -1.321073055267334, + "logps/rejected": -1.6741642951965332, + "loss": 1.1609, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -2.642146110534668, + "rewards/margins": 0.7061826586723328, + "rewards/rejected": -3.3483285903930664, + "step": 364 + }, + { + "epoch": 0.7717211760819292, + "grad_norm": 8.678997039794922, + "learning_rate": 1.490759147111894e-07, + "logits/chosen": -0.6089351773262024, + "logits/rejected": -0.6172072291374207, + "logps/chosen": -1.6598318815231323, + "logps/rejected": -1.9151239395141602, + "loss": 1.2762, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.3196637630462646, + "rewards/margins": 0.5105838775634766, + "rewards/rejected": -3.8302478790283203, + "step": 365 + }, + { + "epoch": 0.7738354806739346, + "grad_norm": 3.29367733001709, + "learning_rate": 1.4644660940672627e-07, + "logits/chosen": -0.6255152821540833, + "logits/rejected": -0.6178345680236816, + "logps/chosen": -1.7635339498519897, + "logps/rejected": -2.02409029006958, + "loss": 1.4469, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.5270678997039795, + "rewards/margins": 0.5211121439933777, + "rewards/rejected": -4.04818058013916, + "step": 366 + }, + { + "epoch": 0.7759497852659398, + "grad_norm": 3.500715732574463, + "learning_rate": 1.438367139018796e-07, + "logits/chosen": -0.6738446354866028, + "logits/rejected": -0.671849250793457, + "logps/chosen": -1.603959560394287, + "logps/rejected": -2.140998363494873, + "loss": 0.9771, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.207919120788574, + "rewards/margins": 1.0740783214569092, + "rewards/rejected": -4.281996726989746, + "step": 367 + }, + { + "epoch": 0.7780640898579452, + "grad_norm": 2.8842501640319824, + "learning_rate": 1.412463714778343e-07, + "logits/chosen": -0.6544129252433777, + "logits/rejected": -0.6667245030403137, + "logps/chosen": -1.7409751415252686, + "logps/rejected": -2.1441538333892822, + "loss": 1.1043, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.481950283050537, + "rewards/margins": 0.806357741355896, + "rewards/rejected": -4.2883076667785645, + "step": 368 + }, + { + "epoch": 0.7801783944499504, + "grad_norm": 3.7606077194213867, + "learning_rate": 1.3867572434232728e-07, + "logits/chosen": -0.6620441675186157, + "logits/rejected": -0.6536539793014526, + "logps/chosen": -1.6755543947219849, + "logps/rejected": -2.012425184249878, + "loss": 1.2249, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.3511087894439697, + "rewards/margins": 0.6737421154975891, + "rewards/rejected": -4.024850368499756, + "step": 369 + }, + { + "epoch": 0.7822926990419558, + "grad_norm": 3.284456729888916, + "learning_rate": 1.3612491362183887e-07, + "logits/chosen": -0.6353476047515869, + "logits/rejected": -0.6363587975502014, + "logps/chosen": -1.6001538038253784, + "logps/rejected": -2.0670526027679443, + "loss": 1.0746, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.200307607650757, + "rewards/margins": 0.9337971210479736, + "rewards/rejected": -4.134105205535889, + "step": 370 + }, + { + "epoch": 0.784407003633961, + "grad_norm": 1.9063444137573242, + "learning_rate": 1.3359407935384642e-07, + "logits/chosen": -0.6120063662528992, + "logits/rejected": -0.5794797539710999, + "logps/chosen": -1.4489734172821045, + "logps/rejected": -1.9216854572296143, + "loss": 1.0928, + "rewards/accuracies": 0.71875, + "rewards/chosen": -2.897946834564209, + "rewards/margins": 0.9454240798950195, + "rewards/rejected": -3.8433709144592285, + "step": 371 + }, + { + "epoch": 0.7865213082259663, + "grad_norm": 6.973724842071533, + "learning_rate": 1.3108336047913633e-07, + "logits/chosen": -0.6082984209060669, + "logits/rejected": -0.6162828207015991, + "logps/chosen": -1.7623229026794434, + "logps/rejected": -2.239130735397339, + "loss": 1.2665, + "rewards/accuracies": 0.59375, + "rewards/chosen": -3.5246458053588867, + "rewards/margins": 0.9536150693893433, + "rewards/rejected": -4.478261470794678, + "step": 372 + }, + { + "epoch": 0.7886356128179716, + "grad_norm": 3.874128580093384, + "learning_rate": 1.2859289483417557e-07, + "logits/chosen": -0.5540960431098938, + "logits/rejected": -0.5091680884361267, + "logps/chosen": -1.85587739944458, + "logps/rejected": -2.3959312438964844, + "loss": 1.0672, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.71175479888916, + "rewards/margins": 1.0801074504852295, + "rewards/rejected": -4.791862487792969, + "step": 373 + }, + { + "epoch": 0.7907499174099769, + "grad_norm": 13.771154403686523, + "learning_rate": 1.261228191435445e-07, + "logits/chosen": -0.599963903427124, + "logits/rejected": -0.5765703916549683, + "logps/chosen": -1.7974251508712769, + "logps/rejected": -2.2272088527679443, + "loss": 1.1994, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.5948503017425537, + "rewards/margins": 0.8595672249794006, + "rewards/rejected": -4.454417705535889, + "step": 374 + }, + { + "epoch": 0.7928642220019821, + "grad_norm": 2.5084969997406006, + "learning_rate": 1.2367326901243214e-07, + "logits/chosen": -0.5945304036140442, + "logits/rejected": -0.6021737456321716, + "logps/chosen": -1.9855573177337646, + "logps/rejected": -2.3953022956848145, + "loss": 1.2576, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.9711146354675293, + "rewards/margins": 0.8194906711578369, + "rewards/rejected": -4.790604591369629, + "step": 375 + }, + { + "epoch": 0.7949785265939875, + "grad_norm": 4.571497440338135, + "learning_rate": 1.2124437891918993e-07, + "logits/chosen": -0.5888144373893738, + "logits/rejected": -0.5575076937675476, + "logps/chosen": -1.8334908485412598, + "logps/rejected": -2.153212070465088, + "loss": 1.2104, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.6669816970825195, + "rewards/margins": 0.639442503452301, + "rewards/rejected": -4.306424140930176, + "step": 376 + }, + { + "epoch": 0.7970928311859927, + "grad_norm": 5.023235321044922, + "learning_rate": 1.1883628220795005e-07, + "logits/chosen": -0.632038414478302, + "logits/rejected": -0.6368271708488464, + "logps/chosen": -1.8573570251464844, + "logps/rejected": -2.291320323944092, + "loss": 1.1719, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.7147140502929688, + "rewards/margins": 0.8679270148277283, + "rewards/rejected": -4.582640647888184, + "step": 377 + }, + { + "epoch": 0.7992071357779981, + "grad_norm": 4.98567533493042, + "learning_rate": 1.1644911108130434e-07, + "logits/chosen": -0.5647228360176086, + "logits/rejected": -0.5541558265686035, + "logps/chosen": -1.8232372999191284, + "logps/rejected": -2.2992348670959473, + "loss": 1.1476, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.646474599838257, + "rewards/margins": 0.9519950747489929, + "rewards/rejected": -4.5984697341918945, + "step": 378 + }, + { + "epoch": 0.8013214403700033, + "grad_norm": 9.514540672302246, + "learning_rate": 1.1408299659304682e-07, + "logits/chosen": -0.5385195016860962, + "logits/rejected": -0.5475942492485046, + "logps/chosen": -2.077877998352051, + "logps/rejected": -2.4877052307128906, + "loss": 1.1605, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -4.155755996704102, + "rewards/margins": 0.8196545243263245, + "rewards/rejected": -4.975410461425781, + "step": 379 + }, + { + "epoch": 0.8034357449620085, + "grad_norm": 7.652558326721191, + "learning_rate": 1.1173806864097884e-07, + "logits/chosen": -0.5651392936706543, + "logits/rejected": -0.5097556114196777, + "logps/chosen": -1.9452521800994873, + "logps/rejected": -2.376047134399414, + "loss": 1.2004, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.8905043601989746, + "rewards/margins": 0.8615895509719849, + "rewards/rejected": -4.752094268798828, + "step": 380 + }, + { + "epoch": 0.8055500495540139, + "grad_norm": 6.184218406677246, + "learning_rate": 1.0941445595977766e-07, + "logits/chosen": -0.5738644599914551, + "logits/rejected": -0.570101797580719, + "logps/chosen": -2.0233359336853027, + "logps/rejected": -2.5829384326934814, + "loss": 1.1539, + "rewards/accuracies": 0.71875, + "rewards/chosen": -4.0466718673706055, + "rewards/margins": 1.1192048788070679, + "rewards/rejected": -5.165876865386963, + "step": 381 + }, + { + "epoch": 0.8076643541460191, + "grad_norm": 4.697547435760498, + "learning_rate": 1.0711228611392936e-07, + "logits/chosen": -0.5766915082931519, + "logits/rejected": -0.5619411468505859, + "logps/chosen": -2.0546395778656006, + "logps/rejected": -2.4459054470062256, + "loss": 1.2723, + "rewards/accuracies": 0.59375, + "rewards/chosen": -4.109279155731201, + "rewards/margins": 0.7825320959091187, + "rewards/rejected": -4.891810894012451, + "step": 382 + }, + { + "epoch": 0.8097786587380245, + "grad_norm": 5.595128536224365, + "learning_rate": 1.0483168549072518e-07, + "logits/chosen": -0.6808648109436035, + "logits/rejected": -0.6518751382827759, + "logps/chosen": -1.9909974336624146, + "logps/rejected": -2.3775596618652344, + "loss": 1.2501, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.981994867324829, + "rewards/margins": 0.7731241583824158, + "rewards/rejected": -4.755119323730469, + "step": 383 + }, + { + "epoch": 0.8118929633300297, + "grad_norm": 3.6460607051849365, + "learning_rate": 1.0257277929332331e-07, + "logits/chosen": -0.6901826858520508, + "logits/rejected": -0.703309953212738, + "logps/chosen": -1.9317903518676758, + "logps/rejected": -2.322279930114746, + "loss": 1.1945, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.8635807037353516, + "rewards/margins": 0.780979335308075, + "rewards/rejected": -4.644559860229492, + "step": 384 + }, + { + "epoch": 0.814007267922035, + "grad_norm": 8.366463661193848, + "learning_rate": 1.0033569153387561e-07, + "logits/chosen": -0.5720599293708801, + "logits/rejected": -0.5666248798370361, + "logps/chosen": -1.9946173429489136, + "logps/rejected": -2.3951826095581055, + "loss": 1.3349, + "rewards/accuracies": 0.546875, + "rewards/chosen": -3.989234685897827, + "rewards/margins": 0.8011305332183838, + "rewards/rejected": -4.790365219116211, + "step": 385 + }, + { + "epoch": 0.8161215725140403, + "grad_norm": 1.8578377962112427, + "learning_rate": 9.812054502671834e-08, + "logits/chosen": -0.6122175455093384, + "logits/rejected": -0.5665942430496216, + "logps/chosen": -2.1414878368377686, + "logps/rejected": -2.646432399749756, + "loss": 1.1834, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.282975673675537, + "rewards/margins": 1.009889006614685, + "rewards/rejected": -5.292864799499512, + "step": 386 + }, + { + "epoch": 0.8182358771060456, + "grad_norm": 4.7323408126831055, + "learning_rate": 9.592746138163093e-08, + "logits/chosen": -0.5390607118606567, + "logits/rejected": -0.5227072834968567, + "logps/chosen": -2.1249067783355713, + "logps/rejected": -2.688115119934082, + "loss": 1.2211, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.249813556671143, + "rewards/margins": 1.1264164447784424, + "rewards/rejected": -5.376230239868164, + "step": 387 + }, + { + "epoch": 0.8203501816980509, + "grad_norm": 2.5557284355163574, + "learning_rate": 9.375656099715934e-08, + "logits/chosen": -0.5654515027999878, + "logits/rejected": -0.5636597275733948, + "logps/chosen": -2.126107692718506, + "logps/rejected": -2.606684684753418, + "loss": 1.1995, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.252215385437012, + "rewards/margins": 0.9611539244651794, + "rewards/rejected": -5.213369369506836, + "step": 388 + }, + { + "epoch": 0.8224644862900562, + "grad_norm": 4.177574634552002, + "learning_rate": 9.160796305400609e-08, + "logits/chosen": -0.6432445645332336, + "logits/rejected": -0.6587055921554565, + "logps/chosen": -2.0785441398620605, + "logps/rejected": -2.4507219791412354, + "loss": 1.2339, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.157088279724121, + "rewards/margins": 0.7443561553955078, + "rewards/rejected": -4.901443958282471, + "step": 389 + }, + { + "epoch": 0.8245787908820614, + "grad_norm": 5.901131629943848, + "learning_rate": 8.9481785508487e-08, + "logits/chosen": -0.588135302066803, + "logits/rejected": -0.5850880742073059, + "logps/chosen": -2.225466251373291, + "logps/rejected": -2.638160467147827, + "loss": 1.2255, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.450932502746582, + "rewards/margins": 0.8253881335258484, + "rewards/rejected": -5.276320934295654, + "step": 390 + }, + { + "epoch": 0.8266930954740668, + "grad_norm": 2.727555751800537, + "learning_rate": 8.737814508605673e-08, + "logits/chosen": -0.5863823294639587, + "logits/rejected": -0.590294599533081, + "logps/chosen": -1.9851064682006836, + "logps/rejected": -2.579831600189209, + "loss": 1.0113, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.970212936401367, + "rewards/margins": 1.1894500255584717, + "rewards/rejected": -5.159663200378418, + "step": 391 + }, + { + "epoch": 0.828807400066072, + "grad_norm": 9.048048973083496, + "learning_rate": 8.529715727489912e-08, + "logits/chosen": -0.5600543022155762, + "logits/rejected": -0.5537065267562866, + "logps/chosen": -1.9846975803375244, + "logps/rejected": -2.2676990032196045, + "loss": 1.3045, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.969395160675049, + "rewards/margins": 0.5660032629966736, + "rewards/rejected": -4.535398006439209, + "step": 392 + }, + { + "epoch": 0.8309217046580774, + "grad_norm": 3.4390201568603516, + "learning_rate": 8.323893631958806e-08, + "logits/chosen": -0.6335893273353577, + "logits/rejected": -0.6190727949142456, + "logps/chosen": -1.908363938331604, + "logps/rejected": -2.510305166244507, + "loss": 1.0262, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.816727876663208, + "rewards/margins": 1.2038825750350952, + "rewards/rejected": -5.020610332489014, + "step": 393 + }, + { + "epoch": 0.8330360092500826, + "grad_norm": 5.347372531890869, + "learning_rate": 8.120359521481501e-08, + "logits/chosen": -0.6408874988555908, + "logits/rejected": -0.643690288066864, + "logps/chosen": -2.019606828689575, + "logps/rejected": -2.3068103790283203, + "loss": 1.3028, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.03921365737915, + "rewards/margins": 0.574406623840332, + "rewards/rejected": -4.613620758056641, + "step": 394 + }, + { + "epoch": 0.8351503138420878, + "grad_norm": 2.2186920642852783, + "learning_rate": 7.9191245699186e-08, + "logits/chosen": -0.7156819105148315, + "logits/rejected": -0.6814436316490173, + "logps/chosen": -2.108549118041992, + "logps/rejected": -2.608646869659424, + "loss": 1.2948, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.217098236083984, + "rewards/margins": 1.0001959800720215, + "rewards/rejected": -5.217293739318848, + "step": 395 + }, + { + "epoch": 0.8372646184340932, + "grad_norm": 2.6448726654052734, + "learning_rate": 7.720199824908691e-08, + "logits/chosen": -0.5753149390220642, + "logits/rejected": -0.6065633296966553, + "logps/chosen": -2.2337100505828857, + "logps/rejected": -2.6677160263061523, + "loss": 1.2273, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.4674201011657715, + "rewards/margins": 0.868012011051178, + "rewards/rejected": -5.335432052612305, + "step": 396 + }, + { + "epoch": 0.8393789230260984, + "grad_norm": 6.596648216247559, + "learning_rate": 7.523596207261907e-08, + "logits/chosen": -0.5432775616645813, + "logits/rejected": -0.4928567409515381, + "logps/chosen": -2.1113924980163574, + "logps/rejected": -2.482846975326538, + "loss": 1.319, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.222784996032715, + "rewards/margins": 0.7429092526435852, + "rewards/rejected": -4.965693950653076, + "step": 397 + }, + { + "epoch": 0.8414932276181037, + "grad_norm": 3.9646811485290527, + "learning_rate": 7.329324510360269e-08, + "logits/chosen": -0.5816119909286499, + "logits/rejected": -0.564030110836029, + "logps/chosen": -2.0296411514282227, + "logps/rejected": -2.5152456760406494, + "loss": 1.1645, + "rewards/accuracies": 0.703125, + "rewards/chosen": -4.059282302856445, + "rewards/margins": 0.9712092876434326, + "rewards/rejected": -5.030491352081299, + "step": 398 + }, + { + "epoch": 0.843607532210109, + "grad_norm": 2.7787463665008545, + "learning_rate": 7.137395399565249e-08, + "logits/chosen": -0.6342184543609619, + "logits/rejected": -0.6318203210830688, + "logps/chosen": -2.0209803581237793, + "logps/rejected": -2.5250658988952637, + "loss": 1.1822, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.041960716247559, + "rewards/margins": 1.0081708431243896, + "rewards/rejected": -5.050131797790527, + "step": 399 + }, + { + "epoch": 0.8457218368021143, + "grad_norm": 4.476524353027344, + "learning_rate": 6.947819411632222e-08, + "logits/chosen": -0.5809480547904968, + "logits/rejected": -0.5740150213241577, + "logps/chosen": -1.9072691202163696, + "logps/rejected": -2.2942898273468018, + "loss": 1.3214, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.8145382404327393, + "rewards/margins": 0.7740417718887329, + "rewards/rejected": -4.5885796546936035, + "step": 400 + }, + { + "epoch": 0.8478361413941196, + "grad_norm": 2.47866153717041, + "learning_rate": 6.760606954131965e-08, + "logits/chosen": -0.5540263652801514, + "logits/rejected": -0.5378059148788452, + "logps/chosen": -1.8337305784225464, + "logps/rejected": -2.264974594116211, + "loss": 1.2396, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.6674611568450928, + "rewards/margins": 0.8624882698059082, + "rewards/rejected": -4.529949188232422, + "step": 401 + }, + { + "epoch": 0.8499504459861249, + "grad_norm": 2.800645112991333, + "learning_rate": 6.575768304879292e-08, + "logits/chosen": -0.6384072303771973, + "logits/rejected": -0.6310533285140991, + "logps/chosen": -1.9723026752471924, + "logps/rejected": -2.3342039585113525, + "loss": 1.2746, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.9446053504943848, + "rewards/margins": 0.7238021492958069, + "rewards/rejected": -4.668407917022705, + "step": 402 + }, + { + "epoch": 0.8520647505781301, + "grad_norm": 2.794485092163086, + "learning_rate": 6.3933136113689e-08, + "logits/chosen": -0.7269207239151001, + "logits/rejected": -0.7003817558288574, + "logps/chosen": -1.8535553216934204, + "logps/rejected": -2.2630820274353027, + "loss": 1.1774, + "rewards/accuracies": 0.765625, + "rewards/chosen": -3.707110643386841, + "rewards/margins": 0.8190534114837646, + "rewards/rejected": -4.5261640548706055, + "step": 403 + }, + { + "epoch": 0.8541790551701355, + "grad_norm": 12.197257041931152, + "learning_rate": 6.213252890218162e-08, + "logits/chosen": -0.5296715497970581, + "logits/rejected": -0.5422269105911255, + "logps/chosen": -1.8217012882232666, + "logps/rejected": -2.3873071670532227, + "loss": 1.1467, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.643402576446533, + "rewards/margins": 1.1312119960784912, + "rewards/rejected": -4.774614334106445, + "step": 404 + }, + { + "epoch": 0.8562933597621407, + "grad_norm": 2.396972179412842, + "learning_rate": 6.03559602661729e-08, + "logits/chosen": -0.6527739763259888, + "logits/rejected": -0.645389199256897, + "logps/chosen": -1.9720454216003418, + "logps/rejected": -2.2900233268737793, + "loss": 1.3423, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.9440908432006836, + "rewards/margins": 0.6359554529190063, + "rewards/rejected": -4.580046653747559, + "step": 405 + }, + { + "epoch": 0.8584076643541461, + "grad_norm": 3.5759809017181396, + "learning_rate": 5.8603527737866307e-08, + "logits/chosen": -0.5955278277397156, + "logits/rejected": -0.583007276058197, + "logps/chosen": -1.835761547088623, + "logps/rejected": -2.2889809608459473, + "loss": 1.1015, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.671523094177246, + "rewards/margins": 0.9064393639564514, + "rewards/rejected": -4.5779619216918945, + "step": 406 + }, + { + "epoch": 0.8605219689461513, + "grad_norm": 8.514383316040039, + "learning_rate": 5.687532752441232e-08, + "logits/chosen": -0.6325979828834534, + "logits/rejected": -0.5895124077796936, + "logps/chosen": -2.0668628215789795, + "logps/rejected": -2.4919605255126953, + "loss": 1.2469, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.133725643157959, + "rewards/margins": 0.8501947522163391, + "rewards/rejected": -4.983921051025391, + "step": 407 + }, + { + "epoch": 0.8626362735381566, + "grad_norm": 2.7234861850738525, + "learning_rate": 5.517145450262639e-08, + "logits/chosen": -0.5355826616287231, + "logits/rejected": -0.5421631932258606, + "logps/chosen": -1.8649351596832275, + "logps/rejected": -2.5664312839508057, + "loss": 1.0119, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.729870319366455, + "rewards/margins": 1.4029927253723145, + "rewards/rejected": -5.132862567901611, + "step": 408 + }, + { + "epoch": 0.8647505781301619, + "grad_norm": 3.1693661212921143, + "learning_rate": 5.3492002213780754e-08, + "logits/chosen": -0.5687247514724731, + "logits/rejected": -0.5579267740249634, + "logps/chosen": -2.0369410514831543, + "logps/rejected": -2.4640278816223145, + "loss": 1.311, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.073882102966309, + "rewards/margins": 0.8541740775108337, + "rewards/rejected": -4.928055763244629, + "step": 409 + }, + { + "epoch": 0.8668648827221671, + "grad_norm": 1.8922606706619263, + "learning_rate": 5.183706285846873e-08, + "logits/chosen": -0.6247987151145935, + "logits/rejected": -0.6043509244918823, + "logps/chosen": -1.8121845722198486, + "logps/rejected": -2.2492425441741943, + "loss": 1.1291, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.6243691444396973, + "rewards/margins": 0.8741158843040466, + "rewards/rejected": -4.498485088348389, + "step": 410 + }, + { + "epoch": 0.8689791873141725, + "grad_norm": 5.305470943450928, + "learning_rate": 5.020672729154307e-08, + "logits/chosen": -0.5554785132408142, + "logits/rejected": -0.565819501876831, + "logps/chosen": -1.9100950956344604, + "logps/rejected": -2.4060237407684326, + "loss": 1.1576, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.820190191268921, + "rewards/margins": 0.9918570518493652, + "rewards/rejected": -4.812047481536865, + "step": 411 + }, + { + "epoch": 0.8710934919061777, + "grad_norm": 3.2367563247680664, + "learning_rate": 4.860108501712823e-08, + "logits/chosen": -0.6536320447921753, + "logits/rejected": -0.6901589035987854, + "logps/chosen": -1.9213619232177734, + "logps/rejected": -2.270475387573242, + "loss": 1.2711, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.842723846435547, + "rewards/margins": 0.6982269287109375, + "rewards/rejected": -4.540950775146484, + "step": 412 + }, + { + "epoch": 0.873207796498183, + "grad_norm": 4.2919135093688965, + "learning_rate": 4.7020224183706715e-08, + "logits/chosen": -0.7220910787582397, + "logits/rejected": -0.7015137672424316, + "logps/chosen": -1.7745577096939087, + "logps/rejected": -2.2485008239746094, + "loss": 1.0773, + "rewards/accuracies": 0.7421875, + "rewards/chosen": -3.5491154193878174, + "rewards/margins": 0.9478861093521118, + "rewards/rejected": -4.497001647949219, + "step": 413 + }, + { + "epoch": 0.8753221010901883, + "grad_norm": 6.373754501342773, + "learning_rate": 4.54642315792792e-08, + "logits/chosen": -0.6177189946174622, + "logits/rejected": -0.6176800727844238, + "logps/chosen": -1.8406522274017334, + "logps/rejected": -2.368619441986084, + "loss": 1.1582, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.681304454803467, + "rewards/margins": 1.0559337139129639, + "rewards/rejected": -4.737238883972168, + "step": 414 + }, + { + "epoch": 0.8774364056821936, + "grad_norm": 4.120994567871094, + "learning_rate": 4.3933192626600725e-08, + "logits/chosen": -0.5981518626213074, + "logits/rejected": -0.5846447348594666, + "logps/chosen": -1.9437062740325928, + "logps/rejected": -2.4175901412963867, + "loss": 1.1865, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.8874125480651855, + "rewards/margins": 0.9477680921554565, + "rewards/rejected": -4.835180282592773, + "step": 415 + }, + { + "epoch": 0.8795507102741988, + "grad_norm": 3.618441104888916, + "learning_rate": 4.242719137849077e-08, + "logits/chosen": -0.544365644454956, + "logits/rejected": -0.5385901927947998, + "logps/chosen": -1.8662028312683105, + "logps/rejected": -2.2550435066223145, + "loss": 1.2125, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.732405662536621, + "rewards/margins": 0.7776816487312317, + "rewards/rejected": -4.510087013244629, + "step": 416 + }, + { + "epoch": 0.8816650148662042, + "grad_norm": 8.518675804138184, + "learning_rate": 4.0946310513218726e-08, + "logits/chosen": -0.6048115491867065, + "logits/rejected": -0.5681714415550232, + "logps/chosen": -2.020745038986206, + "logps/rejected": -2.5642106533050537, + "loss": 1.1682, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.041490077972412, + "rewards/margins": 1.0869308710098267, + "rewards/rejected": -5.128421306610107, + "step": 417 + }, + { + "epoch": 0.8837793194582094, + "grad_norm": 4.693824768066406, + "learning_rate": 3.9490631329964554e-08, + "logits/chosen": -0.5653468370437622, + "logits/rejected": -0.5610933303833008, + "logps/chosen": -1.8477216958999634, + "logps/rejected": -2.280613660812378, + "loss": 1.2177, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.6954433917999268, + "rewards/margins": 0.8657836318016052, + "rewards/rejected": -4.561227321624756, + "step": 418 + }, + { + "epoch": 0.8858936240502148, + "grad_norm": 4.910251617431641, + "learning_rate": 3.806023374435663e-08, + "logits/chosen": -0.6456243991851807, + "logits/rejected": -0.6571968197822571, + "logps/chosen": -1.8414027690887451, + "logps/rejected": -2.2380261421203613, + "loss": 1.2081, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.6828055381774902, + "rewards/margins": 0.7932465076446533, + "rewards/rejected": -4.476052284240723, + "step": 419 + }, + { + "epoch": 0.88800792864222, + "grad_norm": 2.260300636291504, + "learning_rate": 3.665519628408331e-08, + "logits/chosen": -0.6023683547973633, + "logits/rejected": -0.6400430798530579, + "logps/chosen": -2.039283275604248, + "logps/rejected": -2.520536184310913, + "loss": 1.1629, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.078566551208496, + "rewards/margins": 0.962505042552948, + "rewards/rejected": -5.041072368621826, + "step": 420 + }, + { + "epoch": 0.8901222332342253, + "grad_norm": 2.411315679550171, + "learning_rate": 3.527559608458225e-08, + "logits/chosen": -0.6408150792121887, + "logits/rejected": -0.6065229177474976, + "logps/chosen": -1.91830313205719, + "logps/rejected": -2.378871440887451, + "loss": 1.1848, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.83660626411438, + "rewards/margins": 0.9211370944976807, + "rewards/rejected": -4.757742881774902, + "step": 421 + }, + { + "epoch": 0.8922365378262306, + "grad_norm": 8.43724250793457, + "learning_rate": 3.39215088848061e-08, + "logits/chosen": -0.5962439179420471, + "logits/rejected": -0.5975909233093262, + "logps/chosen": -1.9837861061096191, + "logps/rejected": -2.319769859313965, + "loss": 1.2026, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.9675722122192383, + "rewards/margins": 0.6719677448272705, + "rewards/rejected": -4.63953971862793, + "step": 422 + }, + { + "epoch": 0.8943508424182359, + "grad_norm": 2.261178731918335, + "learning_rate": 3.259300902306367e-08, + "logits/chosen": -0.6858331561088562, + "logits/rejected": -0.7034648060798645, + "logps/chosen": -1.8496602773666382, + "logps/rejected": -2.3583877086639404, + "loss": 1.1137, + "rewards/accuracies": 0.734375, + "rewards/chosen": -3.6993205547332764, + "rewards/margins": 1.0174546241760254, + "rewards/rejected": -4.716775417327881, + "step": 423 + }, + { + "epoch": 0.8964651470102412, + "grad_norm": 7.621473789215088, + "learning_rate": 3.129016943293955e-08, + "logits/chosen": -0.6037753224372864, + "logits/rejected": -0.5865834355354309, + "logps/chosen": -1.902024507522583, + "logps/rejected": -2.3152518272399902, + "loss": 1.2577, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.804049015045166, + "rewards/margins": 0.8264546394348145, + "rewards/rejected": -4.6305036544799805, + "step": 424 + }, + { + "epoch": 0.8985794516022465, + "grad_norm": 2.954953908920288, + "learning_rate": 3.001306163928985e-08, + "logits/chosen": -0.6682695746421814, + "logits/rejected": -0.6516857147216797, + "logps/chosen": -2.0923025608062744, + "logps/rejected": -2.4602210521698, + "loss": 1.3758, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.184605121612549, + "rewards/margins": 0.7358372211456299, + "rewards/rejected": -4.9204421043396, + "step": 425 + }, + { + "epoch": 0.9006937561942517, + "grad_norm": 4.746059894561768, + "learning_rate": 2.8761755754315663e-08, + "logits/chosen": -0.6213058829307556, + "logits/rejected": -0.6071665287017822, + "logps/chosen": -1.9309402704238892, + "logps/rejected": -2.3048858642578125, + "loss": 1.2216, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.8618805408477783, + "rewards/margins": 0.7478916049003601, + "rewards/rejected": -4.609771728515625, + "step": 426 + }, + { + "epoch": 0.902808060786257, + "grad_norm": 3.4567902088165283, + "learning_rate": 2.753632047371335e-08, + "logits/chosen": -0.5602300763130188, + "logits/rejected": -0.5994393825531006, + "logps/chosen": -2.0382192134857178, + "logps/rejected": -2.4620015621185303, + "loss": 1.1534, + "rewards/accuracies": 0.6875, + "rewards/chosen": -4.0764384269714355, + "rewards/margins": 0.8475649952888489, + "rewards/rejected": -4.9240031242370605, + "step": 427 + }, + { + "epoch": 0.9049223653782623, + "grad_norm": 8.650147438049316, + "learning_rate": 2.63368230729043e-08, + "logits/chosen": -0.6574521660804749, + "logits/rejected": -0.6474560499191284, + "logps/chosen": -2.01283860206604, + "logps/rejected": -2.3451762199401855, + "loss": 1.3337, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.02567720413208, + "rewards/margins": 0.6646751165390015, + "rewards/rejected": -4.690352439880371, + "step": 428 + }, + { + "epoch": 0.9070366699702676, + "grad_norm": 4.965768337249756, + "learning_rate": 2.5163329403340593e-08, + "logits/chosen": -0.632398784160614, + "logits/rejected": -0.6226595640182495, + "logps/chosen": -1.9954252243041992, + "logps/rejected": -2.415121555328369, + "loss": 1.1249, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.9908504486083984, + "rewards/margins": 0.8393926024436951, + "rewards/rejected": -4.830243110656738, + "step": 429 + }, + { + "epoch": 0.9091509745622729, + "grad_norm": 4.165818214416504, + "learning_rate": 2.4015903888890242e-08, + "logits/chosen": -0.6372086405754089, + "logits/rejected": -0.6573516130447388, + "logps/chosen": -1.9238042831420898, + "logps/rejected": -2.3672964572906494, + "loss": 1.1372, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.8476085662841797, + "rewards/margins": 0.8869843482971191, + "rewards/rejected": -4.734592914581299, + "step": 430 + }, + { + "epoch": 0.9112652791542781, + "grad_norm": 4.025818347930908, + "learning_rate": 2.289460952230038e-08, + "logits/chosen": -0.6017577648162842, + "logits/rejected": -0.5835919380187988, + "logps/chosen": -1.9263951778411865, + "logps/rejected": -2.364337921142578, + "loss": 1.1519, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.852790355682373, + "rewards/margins": 0.875885009765625, + "rewards/rejected": -4.728675842285156, + "step": 431 + }, + { + "epoch": 0.9133795837462835, + "grad_norm": 2.232624053955078, + "learning_rate": 2.1799507861738788e-08, + "logits/chosen": -0.697775661945343, + "logits/rejected": -0.7254015803337097, + "logps/chosen": -1.8258415460586548, + "logps/rejected": -2.089477777481079, + "loss": 1.3136, + "rewards/accuracies": 0.578125, + "rewards/chosen": -3.6516830921173096, + "rewards/margins": 0.5272722244262695, + "rewards/rejected": -4.178955554962158, + "step": 432 + }, + { + "epoch": 0.9154938883382887, + "grad_norm": 5.815128326416016, + "learning_rate": 2.073065902741472e-08, + "logits/chosen": -0.5873744487762451, + "logits/rejected": -0.5638723969459534, + "logps/chosen": -1.9891620874404907, + "logps/rejected": -2.4962096214294434, + "loss": 1.1379, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.9783241748809814, + "rewards/margins": 1.0140951871871948, + "rewards/rejected": -4.992419242858887, + "step": 433 + }, + { + "epoch": 0.917608192930294, + "grad_norm": 5.057411193847656, + "learning_rate": 1.9688121698277993e-08, + "logits/chosen": -0.607324481010437, + "logits/rejected": -0.5964059829711914, + "logps/chosen": -1.8643240928649902, + "logps/rejected": -2.2751855850219727, + "loss": 1.2388, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.7286481857299805, + "rewards/margins": 0.8217229843139648, + "rewards/rejected": -4.550371170043945, + "step": 434 + }, + { + "epoch": 0.9197224975222993, + "grad_norm": 2.25390362739563, + "learning_rate": 1.8671953108797823e-08, + "logits/chosen": -0.6268022656440735, + "logits/rejected": -0.6332954168319702, + "logps/chosen": -1.945924997329712, + "logps/rejected": -2.330981731414795, + "loss": 1.1455, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.891849994659424, + "rewards/margins": 0.7701136469841003, + "rewards/rejected": -4.66196346282959, + "step": 435 + }, + { + "epoch": 0.9218368021143046, + "grad_norm": 3.9572856426239014, + "learning_rate": 1.7682209045820684e-08, + "logits/chosen": -0.6522207856178284, + "logits/rejected": -0.6930267810821533, + "logps/chosen": -1.8152984380722046, + "logps/rejected": -2.0800223350524902, + "loss": 1.2978, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.630596876144409, + "rewards/margins": 0.5294479727745056, + "rewards/rejected": -4.1600446701049805, + "step": 436 + }, + { + "epoch": 0.9239511067063099, + "grad_norm": 1.733438491821289, + "learning_rate": 1.671894384550743e-08, + "logits/chosen": -0.5977643728256226, + "logits/rejected": -0.5842040777206421, + "logps/chosen": -1.8794972896575928, + "logps/rejected": -2.413329601287842, + "loss": 1.0233, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.7589945793151855, + "rewards/margins": 1.0676649808883667, + "rewards/rejected": -4.826659202575684, + "step": 437 + }, + { + "epoch": 0.9260654112983152, + "grad_norm": 2.8760743141174316, + "learning_rate": 1.5782210390350713e-08, + "logits/chosen": -0.5813508033752441, + "logits/rejected": -0.5602753758430481, + "logps/chosen": -1.7892794609069824, + "logps/rejected": -2.32309627532959, + "loss": 1.0836, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.578558921813965, + "rewards/margins": 1.0676335096359253, + "rewards/rejected": -4.64619255065918, + "step": 438 + }, + { + "epoch": 0.9281797158903204, + "grad_norm": 5.760490894317627, + "learning_rate": 1.4872060106271179e-08, + "logits/chosen": -0.5673117637634277, + "logits/rejected": -0.5580011606216431, + "logps/chosen": -1.943117618560791, + "logps/rejected": -2.4581894874572754, + "loss": 1.1229, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.886235237121582, + "rewards/margins": 1.030144453048706, + "rewards/rejected": -4.916378974914551, + "step": 439 + }, + { + "epoch": 0.9302940204823258, + "grad_norm": 5.213393211364746, + "learning_rate": 1.3988542959794625e-08, + "logits/chosen": -0.5715171098709106, + "logits/rejected": -0.5791775584220886, + "logps/chosen": -1.961305022239685, + "logps/rejected": -2.4485957622528076, + "loss": 1.0877, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.92261004447937, + "rewards/margins": 0.9745810627937317, + "rewards/rejected": -4.897191524505615, + "step": 440 + }, + { + "epoch": 0.932408325074331, + "grad_norm": 2.670029878616333, + "learning_rate": 1.3131707455309004e-08, + "logits/chosen": -0.6612206101417542, + "logits/rejected": -0.569149374961853, + "logps/chosen": -1.9947882890701294, + "logps/rejected": -2.41544771194458, + "loss": 1.2501, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.989576578140259, + "rewards/margins": 0.8413184881210327, + "rewards/rejected": -4.83089542388916, + "step": 441 + }, + { + "epoch": 0.9345226296663363, + "grad_norm": 2.0773093700408936, + "learning_rate": 1.230160063240121e-08, + "logits/chosen": -0.5475001335144043, + "logits/rejected": -0.6024526953697205, + "logps/chosen": -1.9972546100616455, + "logps/rejected": -2.2212231159210205, + "loss": 1.2857, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.994509220123291, + "rewards/margins": 0.4479368031024933, + "rewards/rejected": -4.442446231842041, + "step": 442 + }, + { + "epoch": 0.9366369342583416, + "grad_norm": 2.6185569763183594, + "learning_rate": 1.1498268063274697e-08, + "logits/chosen": -0.6600778102874756, + "logits/rejected": -0.6794160604476929, + "logps/chosen": -1.7303975820541382, + "logps/rejected": -2.0589568614959717, + "loss": 1.183, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.4607951641082764, + "rewards/margins": 0.6571190357208252, + "rewards/rejected": -4.117913722991943, + "step": 443 + }, + { + "epoch": 0.9387512388503468, + "grad_norm": 2.7385923862457275, + "learning_rate": 1.0721753850247984e-08, + "logits/chosen": -0.6136504411697388, + "logits/rejected": -0.5926402807235718, + "logps/chosen": -1.9593303203582764, + "logps/rejected": -2.446382999420166, + "loss": 1.161, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.9186606407165527, + "rewards/margins": 0.9741055965423584, + "rewards/rejected": -4.892765998840332, + "step": 444 + }, + { + "epoch": 0.9408655434423522, + "grad_norm": 2.006077527999878, + "learning_rate": 9.972100623333035e-09, + "logits/chosen": -0.5911227464675903, + "logits/rejected": -0.5988056063652039, + "logps/chosen": -1.9767932891845703, + "logps/rejected": -2.307847499847412, + "loss": 1.2698, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.9535865783691406, + "rewards/margins": 0.6621084809303284, + "rewards/rejected": -4.615694999694824, + "step": 445 + }, + { + "epoch": 0.9429798480343574, + "grad_norm": 3.775676965713501, + "learning_rate": 9.249349537894968e-09, + "logits/chosen": -0.5951496958732605, + "logits/rejected": -0.5602840185165405, + "logps/chosen": -2.01466965675354, + "logps/rejected": -2.404120922088623, + "loss": 1.3551, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.02933931350708, + "rewards/margins": 0.7789022922515869, + "rewards/rejected": -4.808241844177246, + "step": 446 + }, + { + "epoch": 0.9450941526263628, + "grad_norm": 10.657898902893066, + "learning_rate": 8.553540272392967e-09, + "logits/chosen": -0.616013765335083, + "logits/rejected": -0.6068493127822876, + "logps/chosen": -1.9523563385009766, + "logps/rejected": -2.3371798992156982, + "loss": 1.2264, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.904712677001953, + "rewards/margins": 0.7696471810340881, + "rewards/rejected": -4.6743597984313965, + "step": 447 + }, + { + "epoch": 0.947208457218368, + "grad_norm": 5.239955902099609, + "learning_rate": 7.884711026201584e-09, + "logits/chosen": -0.5559091567993164, + "logits/rejected": -0.5499454140663147, + "logps/chosen": -1.9888339042663574, + "logps/rejected": -2.5645201206207275, + "loss": 1.1615, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.977667808532715, + "rewards/margins": 1.1513725519180298, + "rewards/rejected": -5.129040241241455, + "step": 448 + } + ], + "logging_steps": 1, + "max_steps": 472, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-472/config.json b/checkpoint-472/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ead276a9e0bbd4a0baae360715d8e80853974eb6 --- /dev/null +++ b/checkpoint-472/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "/home/v-liuzichuan/huggingface/Llama-3.2-3B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/checkpoint-472/generation_config.json b/checkpoint-472/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18d16b4cc3c799dd78e854fd824ddf8a2996285 --- /dev/null +++ b/checkpoint-472/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.2" +} diff --git a/checkpoint-472/model-00001-of-00002.safetensors b/checkpoint-472/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..338240a8dd22aab493666ae76b5bacad45a258c4 --- /dev/null +++ b/checkpoint-472/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b545023ec84ebafbf8707baa5dd99d4a8fc503e81f47167d70732510d2ca7922 +size 4965799096 diff --git a/checkpoint-472/model-00002-of-00002.safetensors b/checkpoint-472/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33b50cf297e3a969e9a172bcf6adccc38753d31e --- /dev/null +++ b/checkpoint-472/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05ed5a1ee186eaa4b7717096d6008f744a478b154c9e969eaaddd323150b36c5 +size 2247734992 diff --git a/checkpoint-472/model.safetensors.index.json b/checkpoint-472/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ed64de846d720b9a7859dc20575fea8e8ca51940 --- /dev/null +++ b/checkpoint-472/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_size": 7213504512 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-472/special_tokens_map.json b/checkpoint-472/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-472/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-472/tokenizer.json b/checkpoint-472/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-472/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-472/tokenizer_config.json b/checkpoint-472/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdb7bfcc59fd293867d98e124a30b3ddf39c93 --- /dev/null +++ b/checkpoint-472/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-472/trainer_state.json b/checkpoint-472/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..31fba906b3b106e65cd465a27f099ecf6736fdf6 --- /dev/null +++ b/checkpoint-472/trainer_state.json @@ -0,0 +1,7113 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9979517674264948, + "eval_steps": 500, + "global_step": 472, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002114304592005286, + "grad_norm": 1.5018059015274048, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.3466828167438507, + "logits/rejected": -0.30099987983703613, + "logps/chosen": -0.9345186948776245, + "logps/rejected": -0.9117153882980347, + "loss": 1.4889, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.869037389755249, + "rewards/margins": -0.04560665041208267, + "rewards/rejected": -1.8234307765960693, + "step": 1 + }, + { + "epoch": 0.004228609184010572, + "grad_norm": 0.8093975186347961, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.4310421049594879, + "logits/rejected": -0.39132067561149597, + "logps/chosen": -0.8198825716972351, + "logps/rejected": -0.8644211888313293, + "loss": 1.376, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.6397651433944702, + "rewards/margins": 0.08907715976238251, + "rewards/rejected": -1.7288423776626587, + "step": 2 + }, + { + "epoch": 0.006342913776015857, + "grad_norm": 0.5377389788627625, + "learning_rate": 6.25e-08, + "logits/chosen": -0.46692028641700745, + "logits/rejected": -0.4649256467819214, + "logps/chosen": -0.9087910652160645, + "logps/rejected": -0.9648240804672241, + "loss": 1.3404, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.817582130432129, + "rewards/margins": 0.11206617206335068, + "rewards/rejected": -1.9296481609344482, + "step": 3 + }, + { + "epoch": 0.008457218368021144, + "grad_norm": 0.3221875727176666, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.416828453540802, + "logits/rejected": -0.3584724962711334, + "logps/chosen": -0.7818898558616638, + "logps/rejected": -0.8170815110206604, + "loss": 1.3806, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.5637797117233276, + "rewards/margins": 0.07038339227437973, + "rewards/rejected": -1.6341630220413208, + "step": 4 + }, + { + "epoch": 0.010571522960026428, + "grad_norm": 0.64655601978302, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.376886248588562, + "logits/rejected": -0.3516141474246979, + "logps/chosen": -0.8814125061035156, + "logps/rejected": -1.0214396715164185, + "loss": 1.2741, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7628250122070312, + "rewards/margins": 0.28005433082580566, + "rewards/rejected": -2.042879343032837, + "step": 5 + }, + { + "epoch": 0.012685827552031714, + "grad_norm": 0.4775894582271576, + "learning_rate": 1.25e-07, + "logits/chosen": -0.4757865369319916, + "logits/rejected": -0.4498941898345947, + "logps/chosen": -0.8962199687957764, + "logps/rejected": -0.9462199807167053, + "loss": 1.364, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.7924399375915527, + "rewards/margins": 0.10000008344650269, + "rewards/rejected": -1.8924399614334106, + "step": 6 + }, + { + "epoch": 0.014800132144037, + "grad_norm": 1.2459568977355957, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.38895344734191895, + "logits/rejected": -0.38165366649627686, + "logps/chosen": -0.9025766253471375, + "logps/rejected": -0.9465017318725586, + "loss": 1.3898, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.805153250694275, + "rewards/margins": 0.0878501906991005, + "rewards/rejected": -1.8930034637451172, + "step": 7 + }, + { + "epoch": 0.016914436736042288, + "grad_norm": 0.6195729374885559, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.3964853286743164, + "logits/rejected": -0.377862811088562, + "logps/chosen": -0.9054160118103027, + "logps/rejected": -0.9605879187583923, + "loss": 1.3821, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8108320236206055, + "rewards/margins": 0.1103438138961792, + "rewards/rejected": -1.9211758375167847, + "step": 8 + }, + { + "epoch": 0.019028741328047574, + "grad_norm": 1.2074137926101685, + "learning_rate": 1.875e-07, + "logits/chosen": -0.3729037344455719, + "logits/rejected": -0.38143450021743774, + "logps/chosen": -0.9328653216362, + "logps/rejected": -0.9905799627304077, + "loss": 1.3754, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8657306432724, + "rewards/margins": 0.11542946100234985, + "rewards/rejected": -1.9811599254608154, + "step": 9 + }, + { + "epoch": 0.021143045920052856, + "grad_norm": 0.2867220640182495, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.4263336658477783, + "logits/rejected": -0.42903271317481995, + "logps/chosen": -0.8979260325431824, + "logps/rejected": -0.9078099727630615, + "loss": 1.4438, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7958520650863647, + "rewards/margins": 0.019767940044403076, + "rewards/rejected": -1.815619945526123, + "step": 10 + }, + { + "epoch": 0.023257350512058142, + "grad_norm": 0.8363026976585388, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.3374914526939392, + "logits/rejected": -0.32399696111679077, + "logps/chosen": -0.8886098861694336, + "logps/rejected": -0.9484556317329407, + "loss": 1.3422, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7772197723388672, + "rewards/margins": 0.11969132721424103, + "rewards/rejected": -1.8969112634658813, + "step": 11 + }, + { + "epoch": 0.025371655104063428, + "grad_norm": 0.5406804084777832, + "learning_rate": 2.5e-07, + "logits/chosen": -0.42844679951667786, + "logits/rejected": -0.37984615564346313, + "logps/chosen": -0.861629843711853, + "logps/rejected": -0.8968492150306702, + "loss": 1.3922, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.723259687423706, + "rewards/margins": 0.07043875753879547, + "rewards/rejected": -1.7936984300613403, + "step": 12 + }, + { + "epoch": 0.027485959696068714, + "grad_norm": 0.9919329285621643, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.36495402455329895, + "logits/rejected": -0.3249490261077881, + "logps/chosen": -0.8502095937728882, + "logps/rejected": -0.8470643758773804, + "loss": 1.4334, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7004191875457764, + "rewards/margins": -0.006290358491241932, + "rewards/rejected": -1.6941287517547607, + "step": 13 + }, + { + "epoch": 0.029600264288074, + "grad_norm": 0.5477162003517151, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.4155704081058502, + "logits/rejected": -0.39535820484161377, + "logps/chosen": -1.0430240631103516, + "logps/rejected": -1.1318373680114746, + "loss": 1.3533, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.086048126220703, + "rewards/margins": 0.17762640118598938, + "rewards/rejected": -2.263674736022949, + "step": 14 + }, + { + "epoch": 0.031714568880079286, + "grad_norm": 0.26530712842941284, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.4810572564601898, + "logits/rejected": -0.42454615235328674, + "logps/chosen": -0.8741041421890259, + "logps/rejected": -0.9494178295135498, + "loss": 1.3655, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7482082843780518, + "rewards/margins": 0.15062758326530457, + "rewards/rejected": -1.8988356590270996, + "step": 15 + }, + { + "epoch": 0.033828873472084575, + "grad_norm": 0.9272629618644714, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.4440098702907562, + "logits/rejected": -0.3930297791957855, + "logps/chosen": -0.8473359942436218, + "logps/rejected": -0.9369213581085205, + "loss": 1.3248, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6946719884872437, + "rewards/margins": 0.17917080223560333, + "rewards/rejected": -1.873842716217041, + "step": 16 + }, + { + "epoch": 0.03594317806408986, + "grad_norm": 0.5912418961524963, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.3838099539279938, + "logits/rejected": -0.3507584035396576, + "logps/chosen": -0.8888350129127502, + "logps/rejected": -0.9361770749092102, + "loss": 1.383, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.7776700258255005, + "rewards/margins": 0.0946839451789856, + "rewards/rejected": -1.8723541498184204, + "step": 17 + }, + { + "epoch": 0.03805748265609515, + "grad_norm": 0.6536504030227661, + "learning_rate": 3.75e-07, + "logits/chosen": -0.3581697940826416, + "logits/rejected": -0.3620460629463196, + "logps/chosen": -0.8519617319107056, + "logps/rejected": -0.9022184610366821, + "loss": 1.3841, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7039234638214111, + "rewards/margins": 0.10051343590021133, + "rewards/rejected": -1.8044369220733643, + "step": 18 + }, + { + "epoch": 0.04017178724810043, + "grad_norm": 0.3433632552623749, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.37887442111968994, + "logits/rejected": -0.37543320655822754, + "logps/chosen": -0.9464104175567627, + "logps/rejected": -1.0017329454421997, + "loss": 1.3649, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8928208351135254, + "rewards/margins": 0.11064518243074417, + "rewards/rejected": -2.0034658908843994, + "step": 19 + }, + { + "epoch": 0.04228609184010571, + "grad_norm": 0.9764007329940796, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.44110679626464844, + "logits/rejected": -0.4280649721622467, + "logps/chosen": -0.9046768546104431, + "logps/rejected": -1.0464633703231812, + "loss": 1.2592, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.8093537092208862, + "rewards/margins": 0.2835729420185089, + "rewards/rejected": -2.0929267406463623, + "step": 20 + }, + { + "epoch": 0.044400396432111, + "grad_norm": 1.8563830852508545, + "learning_rate": 4.375e-07, + "logits/chosen": -0.45183491706848145, + "logits/rejected": -0.42935287952423096, + "logps/chosen": -0.9043138027191162, + "logps/rejected": -0.9462392926216125, + "loss": 1.3784, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.8086276054382324, + "rewards/margins": 0.08385094255208969, + "rewards/rejected": -1.892478585243225, + "step": 21 + }, + { + "epoch": 0.046514701024116284, + "grad_norm": 1.3473299741744995, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.37855517864227295, + "logits/rejected": -0.34429043531417847, + "logps/chosen": -0.9284683465957642, + "logps/rejected": -0.9454050064086914, + "loss": 1.4346, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.8569366931915283, + "rewards/margins": 0.03387312963604927, + "rewards/rejected": -1.8908100128173828, + "step": 22 + }, + { + "epoch": 0.04862900561612157, + "grad_norm": 0.940831184387207, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.39172160625457764, + "logits/rejected": -0.3695780634880066, + "logps/chosen": -0.9314202666282654, + "logps/rejected": -1.020229697227478, + "loss": 1.3322, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8628405332565308, + "rewards/margins": 0.17761869728565216, + "rewards/rejected": -2.040459394454956, + "step": 23 + }, + { + "epoch": 0.050743310208126856, + "grad_norm": 0.5783158540725708, + "learning_rate": 5e-07, + "logits/chosen": -0.4958629608154297, + "logits/rejected": -0.4257377088069916, + "logps/chosen": -0.9379237294197083, + "logps/rejected": -0.9415461421012878, + "loss": 1.441, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.8758474588394165, + "rewards/margins": 0.0072449808940291405, + "rewards/rejected": -1.8830922842025757, + "step": 24 + }, + { + "epoch": 0.052857614800132145, + "grad_norm": 1.4209853410720825, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -0.36407172679901123, + "logits/rejected": -0.3331725299358368, + "logps/chosen": -0.9192589521408081, + "logps/rejected": -0.9595308899879456, + "loss": 1.3994, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8385179042816162, + "rewards/margins": 0.080544114112854, + "rewards/rejected": -1.9190617799758911, + "step": 25 + }, + { + "epoch": 0.05497191939213743, + "grad_norm": 0.6310216188430786, + "learning_rate": 5.416666666666666e-07, + "logits/chosen": -0.41772690415382385, + "logits/rejected": -0.36565953493118286, + "logps/chosen": -0.8052878379821777, + "logps/rejected": -0.8673746585845947, + "loss": 1.3356, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6105756759643555, + "rewards/margins": 0.12417369335889816, + "rewards/rejected": -1.7347493171691895, + "step": 26 + }, + { + "epoch": 0.05708622398414272, + "grad_norm": 1.2933462858200073, + "learning_rate": 5.625e-07, + "logits/chosen": -0.4482795000076294, + "logits/rejected": -0.39409321546554565, + "logps/chosen": -0.8339261412620544, + "logps/rejected": -0.8675202131271362, + "loss": 1.3739, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.6678522825241089, + "rewards/margins": 0.06718815863132477, + "rewards/rejected": -1.7350404262542725, + "step": 27 + }, + { + "epoch": 0.059200528576148, + "grad_norm": 0.5808025002479553, + "learning_rate": 5.833333333333334e-07, + "logits/chosen": -0.37116044759750366, + "logits/rejected": -0.3478051722049713, + "logps/chosen": -0.8950318694114685, + "logps/rejected": -0.9756672978401184, + "loss": 1.3505, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.790063738822937, + "rewards/margins": 0.1612708568572998, + "rewards/rejected": -1.9513345956802368, + "step": 28 + }, + { + "epoch": 0.06131483316815329, + "grad_norm": 1.0569533109664917, + "learning_rate": 6.041666666666666e-07, + "logits/chosen": -0.421148419380188, + "logits/rejected": -0.38443076610565186, + "logps/chosen": -0.8021283745765686, + "logps/rejected": -0.8370179533958435, + "loss": 1.3916, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.6042567491531372, + "rewards/margins": 0.06977920234203339, + "rewards/rejected": -1.674035906791687, + "step": 29 + }, + { + "epoch": 0.06342913776015857, + "grad_norm": 0.42577147483825684, + "learning_rate": 6.249999999999999e-07, + "logits/chosen": -0.4429818391799927, + "logits/rejected": -0.3524704575538635, + "logps/chosen": -0.8916822671890259, + "logps/rejected": -0.8985542058944702, + "loss": 1.4321, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.7833645343780518, + "rewards/margins": 0.01374388113617897, + "rewards/rejected": -1.7971084117889404, + "step": 30 + }, + { + "epoch": 0.06554344235216386, + "grad_norm": 1.0056904554367065, + "learning_rate": 6.458333333333333e-07, + "logits/chosen": -0.376451700925827, + "logits/rejected": -0.342519074678421, + "logps/chosen": -0.9038617014884949, + "logps/rejected": -0.953092634677887, + "loss": 1.398, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8077234029769897, + "rewards/margins": 0.09846188127994537, + "rewards/rejected": -1.906185269355774, + "step": 31 + }, + { + "epoch": 0.06765774694416915, + "grad_norm": 0.5494012236595154, + "learning_rate": 6.666666666666666e-07, + "logits/chosen": -0.3459138870239258, + "logits/rejected": -0.3590989410877228, + "logps/chosen": -0.8274999260902405, + "logps/rejected": -0.8776509761810303, + "loss": 1.363, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.654999852180481, + "rewards/margins": 0.1003020703792572, + "rewards/rejected": -1.7553019523620605, + "step": 32 + }, + { + "epoch": 0.06977205153617443, + "grad_norm": 0.693267822265625, + "learning_rate": 6.875e-07, + "logits/chosen": -0.40053680539131165, + "logits/rejected": -0.37323904037475586, + "logps/chosen": -0.8255244493484497, + "logps/rejected": -0.8658804893493652, + "loss": 1.3712, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6510488986968994, + "rewards/margins": 0.08071210980415344, + "rewards/rejected": -1.7317609786987305, + "step": 33 + }, + { + "epoch": 0.07188635612817972, + "grad_norm": 2.213238000869751, + "learning_rate": 7.083333333333334e-07, + "logits/chosen": -0.40097948908805847, + "logits/rejected": -0.38190510869026184, + "logps/chosen": -0.9122671484947205, + "logps/rejected": -0.9549552798271179, + "loss": 1.36, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.824534296989441, + "rewards/margins": 0.0853763073682785, + "rewards/rejected": -1.9099105596542358, + "step": 34 + }, + { + "epoch": 0.074000660720185, + "grad_norm": 0.6859830021858215, + "learning_rate": 7.291666666666666e-07, + "logits/chosen": -0.42501094937324524, + "logits/rejected": -0.42549416422843933, + "logps/chosen": -1.0008373260498047, + "logps/rejected": -1.1157118082046509, + "loss": 1.3294, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.0016746520996094, + "rewards/margins": 0.22974897921085358, + "rewards/rejected": -2.2314236164093018, + "step": 35 + }, + { + "epoch": 0.0761149653121903, + "grad_norm": 0.6468721628189087, + "learning_rate": 7.5e-07, + "logits/chosen": -0.36494994163513184, + "logits/rejected": -0.30433908104896545, + "logps/chosen": -0.9062094688415527, + "logps/rejected": -0.920263409614563, + "loss": 1.4312, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.8124189376831055, + "rewards/margins": 0.02810765616595745, + "rewards/rejected": -1.840526819229126, + "step": 36 + }, + { + "epoch": 0.07822926990419557, + "grad_norm": 0.5085556507110596, + "learning_rate": 7.708333333333333e-07, + "logits/chosen": -0.4677881598472595, + "logits/rejected": -0.456132709980011, + "logps/chosen": -1.0101865530014038, + "logps/rejected": -1.0429682731628418, + "loss": 1.4132, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -2.0203731060028076, + "rewards/margins": 0.06556359678506851, + "rewards/rejected": -2.0859365463256836, + "step": 37 + }, + { + "epoch": 0.08034357449620086, + "grad_norm": 0.23813335597515106, + "learning_rate": 7.916666666666666e-07, + "logits/chosen": -0.3991190791130066, + "logits/rejected": -0.3664044141769409, + "logps/chosen": -0.9578174352645874, + "logps/rejected": -0.9229263067245483, + "loss": 1.4824, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.9156348705291748, + "rewards/margins": -0.06978224962949753, + "rewards/rejected": -1.8458526134490967, + "step": 38 + }, + { + "epoch": 0.08245787908820615, + "grad_norm": 0.587037980556488, + "learning_rate": 8.125e-07, + "logits/chosen": -0.37554049491882324, + "logits/rejected": -0.36305734515190125, + "logps/chosen": -0.8503091931343079, + "logps/rejected": -0.864615261554718, + "loss": 1.4086, + "rewards/accuracies": 0.4296875, + "rewards/chosen": -1.7006183862686157, + "rewards/margins": 0.028611989691853523, + "rewards/rejected": -1.729230523109436, + "step": 39 + }, + { + "epoch": 0.08457218368021142, + "grad_norm": 0.4172501862049103, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.4405443072319031, + "logits/rejected": -0.41723060607910156, + "logps/chosen": -0.8502858877182007, + "logps/rejected": -0.9114271402359009, + "loss": 1.3446, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7005717754364014, + "rewards/margins": 0.12228240817785263, + "rewards/rejected": -1.8228542804718018, + "step": 40 + }, + { + "epoch": 0.08668648827221671, + "grad_norm": 0.9275372624397278, + "learning_rate": 8.541666666666666e-07, + "logits/chosen": -0.4200601577758789, + "logits/rejected": -0.3478623628616333, + "logps/chosen": -0.892408013343811, + "logps/rejected": -0.9276402592658997, + "loss": 1.3887, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.784816026687622, + "rewards/margins": 0.07046431303024292, + "rewards/rejected": -1.8552805185317993, + "step": 41 + }, + { + "epoch": 0.088800792864222, + "grad_norm": 0.7317383289337158, + "learning_rate": 8.75e-07, + "logits/chosen": -0.37675267457962036, + "logits/rejected": -0.33540332317352295, + "logps/chosen": -0.7866061925888062, + "logps/rejected": -0.824250340461731, + "loss": 1.3837, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5732123851776123, + "rewards/margins": 0.07528844475746155, + "rewards/rejected": -1.648500680923462, + "step": 42 + }, + { + "epoch": 0.09091509745622729, + "grad_norm": 0.9452736973762512, + "learning_rate": 8.958333333333334e-07, + "logits/chosen": -0.4662383198738098, + "logits/rejected": -0.4447881579399109, + "logps/chosen": -0.9490666389465332, + "logps/rejected": -1.0112388134002686, + "loss": 1.3412, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8981332778930664, + "rewards/margins": 0.12434446066617966, + "rewards/rejected": -2.022477626800537, + "step": 43 + }, + { + "epoch": 0.09302940204823257, + "grad_norm": 0.2848323881626129, + "learning_rate": 9.166666666666665e-07, + "logits/chosen": -0.41404005885124207, + "logits/rejected": -0.3944583535194397, + "logps/chosen": -0.8224930167198181, + "logps/rejected": -0.8416361808776855, + "loss": 1.4027, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.6449860334396362, + "rewards/margins": 0.038286346942186356, + "rewards/rejected": -1.683272361755371, + "step": 44 + }, + { + "epoch": 0.09514370664023786, + "grad_norm": 0.7165678143501282, + "learning_rate": 9.374999999999999e-07, + "logits/chosen": -0.40475326776504517, + "logits/rejected": -0.3559921383857727, + "logps/chosen": -0.8070214986801147, + "logps/rejected": -0.8993593454360962, + "loss": 1.3148, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6140429973602295, + "rewards/margins": 0.18467575311660767, + "rewards/rejected": -1.7987186908721924, + "step": 45 + }, + { + "epoch": 0.09725801123224315, + "grad_norm": 0.4779021739959717, + "learning_rate": 9.583333333333334e-07, + "logits/chosen": -0.4171525835990906, + "logits/rejected": -0.42166149616241455, + "logps/chosen": -0.7872560024261475, + "logps/rejected": -0.8496187925338745, + "loss": 1.3356, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.574512004852295, + "rewards/margins": 0.12472567707300186, + "rewards/rejected": -1.699237585067749, + "step": 46 + }, + { + "epoch": 0.09937231582424844, + "grad_norm": 0.7870219349861145, + "learning_rate": 9.791666666666667e-07, + "logits/chosen": -0.3734116554260254, + "logits/rejected": -0.32778748869895935, + "logps/chosen": -0.7842286825180054, + "logps/rejected": -0.8161548972129822, + "loss": 1.3647, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5684573650360107, + "rewards/margins": 0.06385258585214615, + "rewards/rejected": -1.6323097944259644, + "step": 47 + }, + { + "epoch": 0.10148662041625371, + "grad_norm": 0.2597256600856781, + "learning_rate": 1e-06, + "logits/chosen": -0.4355677664279938, + "logits/rejected": -0.38983187079429626, + "logps/chosen": -0.8787693977355957, + "logps/rejected": -0.9383041262626648, + "loss": 1.35, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.7575387954711914, + "rewards/margins": 0.11906948685646057, + "rewards/rejected": -1.8766082525253296, + "step": 48 + }, + { + "epoch": 0.103600925008259, + "grad_norm": 0.9942799210548401, + "learning_rate": 9.999862751990697e-07, + "logits/chosen": -0.4244321882724762, + "logits/rejected": -0.4366786777973175, + "logps/chosen": -0.7910157442092896, + "logps/rejected": -0.8630884885787964, + "loss": 1.3166, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.582031488418579, + "rewards/margins": 0.14414538443088531, + "rewards/rejected": -1.7261769771575928, + "step": 49 + }, + { + "epoch": 0.10571522960026429, + "grad_norm": 0.5333903431892395, + "learning_rate": 9.999451015497595e-07, + "logits/chosen": -0.389942467212677, + "logits/rejected": -0.36674585938453674, + "logps/chosen": -0.7312074899673462, + "logps/rejected": -0.7289648652076721, + "loss": 1.4225, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4624149799346924, + "rewards/margins": -0.004485193639993668, + "rewards/rejected": -1.4579297304153442, + "step": 50 + }, + { + "epoch": 0.10782953419226958, + "grad_norm": 0.5712242722511292, + "learning_rate": 9.9987648131247e-07, + "logits/chosen": -0.4622853994369507, + "logits/rejected": -0.3728552460670471, + "logps/chosen": -0.8764299750328064, + "logps/rejected": -0.869678795337677, + "loss": 1.4542, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.7528599500656128, + "rewards/margins": -0.013502337038516998, + "rewards/rejected": -1.739357590675354, + "step": 51 + }, + { + "epoch": 0.10994383878427486, + "grad_norm": 0.2586441934108734, + "learning_rate": 9.99780418254397e-07, + "logits/chosen": -0.37249019742012024, + "logits/rejected": -0.3998304605484009, + "logps/chosen": -0.8435611724853516, + "logps/rejected": -0.9359882473945618, + "loss": 1.3057, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6871223449707031, + "rewards/margins": 0.18485431373119354, + "rewards/rejected": -1.8719764947891235, + "step": 52 + }, + { + "epoch": 0.11205814337628014, + "grad_norm": 1.0829113721847534, + "learning_rate": 9.996569176493268e-07, + "logits/chosen": -0.47697725892066956, + "logits/rejected": -0.4208195209503174, + "logps/chosen": -0.8014968037605286, + "logps/rejected": -0.8703804612159729, + "loss": 1.3523, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6029936075210571, + "rewards/margins": 0.1377674788236618, + "rewards/rejected": -1.7407609224319458, + "step": 53 + }, + { + "epoch": 0.11417244796828543, + "grad_norm": 0.5523208379745483, + "learning_rate": 9.995059862773438e-07, + "logits/chosen": -0.40533363819122314, + "logits/rejected": -0.36801978945732117, + "logps/chosen": -0.7641825675964355, + "logps/rejected": -0.8168596029281616, + "loss": 1.3692, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.528365135192871, + "rewards/margins": 0.10535416752099991, + "rewards/rejected": -1.6337192058563232, + "step": 54 + }, + { + "epoch": 0.11628675256029072, + "grad_norm": 0.614101767539978, + "learning_rate": 9.993276324244605e-07, + "logits/chosen": -0.4476906955242157, + "logits/rejected": -0.40396648645401, + "logps/chosen": -0.8706808090209961, + "logps/rejected": -0.9221430420875549, + "loss": 1.3787, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7413616180419922, + "rewards/margins": 0.10292442888021469, + "rewards/rejected": -1.8442860841751099, + "step": 55 + }, + { + "epoch": 0.118401057152296, + "grad_norm": 0.3428778052330017, + "learning_rate": 9.991218658821608e-07, + "logits/chosen": -0.31709593534469604, + "logits/rejected": -0.2760937213897705, + "logps/chosen": -0.842248797416687, + "logps/rejected": -0.8068034648895264, + "loss": 1.498, + "rewards/accuracies": 0.4375, + "rewards/chosen": -1.684497594833374, + "rewards/margins": -0.07089066505432129, + "rewards/rejected": -1.6136069297790527, + "step": 56 + }, + { + "epoch": 0.12051536174430129, + "grad_norm": 0.6877723932266235, + "learning_rate": 9.988886979468643e-07, + "logits/chosen": -0.41800016164779663, + "logits/rejected": -0.4011584222316742, + "logps/chosen": -0.7845420837402344, + "logps/rejected": -0.834447979927063, + "loss": 1.3491, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5690841674804688, + "rewards/margins": 0.09981165081262589, + "rewards/rejected": -1.668895959854126, + "step": 57 + }, + { + "epoch": 0.12262966633630658, + "grad_norm": 0.9649701714515686, + "learning_rate": 9.98628141419305e-07, + "logits/chosen": -0.4253537058830261, + "logits/rejected": -0.4305458962917328, + "logps/chosen": -0.86476731300354, + "logps/rejected": -0.9080386161804199, + "loss": 1.3639, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.72953462600708, + "rewards/margins": 0.08654248714447021, + "rewards/rejected": -1.8160772323608398, + "step": 58 + }, + { + "epoch": 0.12474397092831185, + "grad_norm": 1.3779780864715576, + "learning_rate": 9.98340210603829e-07, + "logits/chosen": -0.39970022439956665, + "logits/rejected": -0.441428005695343, + "logps/chosen": -0.8662775158882141, + "logps/rejected": -0.9646260738372803, + "loss": 1.3001, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7325550317764282, + "rewards/margins": 0.19669723510742188, + "rewards/rejected": -1.9292521476745605, + "step": 59 + }, + { + "epoch": 0.12685827552031714, + "grad_norm": 0.5366966724395752, + "learning_rate": 9.980249213076084e-07, + "logits/chosen": -0.37770116329193115, + "logits/rejected": -0.35231757164001465, + "logps/chosen": -0.8165755867958069, + "logps/rejected": -0.8619179129600525, + "loss": 1.3699, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6331511735916138, + "rewards/margins": 0.09068439900875092, + "rewards/rejected": -1.723835825920105, + "step": 60 + }, + { + "epoch": 0.12897258011232243, + "grad_norm": 0.36810922622680664, + "learning_rate": 9.976822908397748e-07, + "logits/chosen": -0.4224976897239685, + "logits/rejected": -0.41758257150650024, + "logps/chosen": -0.8445641994476318, + "logps/rejected": -0.9393664598464966, + "loss": 1.3193, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6891283988952637, + "rewards/margins": 0.18960458040237427, + "rewards/rejected": -1.8787329196929932, + "step": 61 + }, + { + "epoch": 0.13108688470432772, + "grad_norm": 0.6838279366493225, + "learning_rate": 9.97312338010468e-07, + "logits/chosen": -0.4168627858161926, + "logits/rejected": -0.36115381121635437, + "logps/chosen": -0.8370552659034729, + "logps/rejected": -0.8352169394493103, + "loss": 1.4284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6741105318069458, + "rewards/margins": -0.0036766715347766876, + "rewards/rejected": -1.6704338788986206, + "step": 62 + }, + { + "epoch": 0.133201189296333, + "grad_norm": 0.39330533146858215, + "learning_rate": 9.969150831298037e-07, + "logits/chosen": -0.4558233618736267, + "logits/rejected": -0.4025765061378479, + "logps/chosen": -0.826255738735199, + "logps/rejected": -0.894213080406189, + "loss": 1.3485, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.652511477470398, + "rewards/margins": 0.13591471314430237, + "rewards/rejected": -1.788426160812378, + "step": 63 + }, + { + "epoch": 0.1353154938883383, + "grad_norm": 0.6055929660797119, + "learning_rate": 9.964905480067584e-07, + "logits/chosen": -0.459463506937027, + "logits/rejected": -0.42943331599235535, + "logps/chosen": -0.7901928424835205, + "logps/rejected": -0.7964221239089966, + "loss": 1.4057, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.580385684967041, + "rewards/margins": 0.012458762153983116, + "rewards/rejected": -1.5928442478179932, + "step": 64 + }, + { + "epoch": 0.13742979848034356, + "grad_norm": 0.37883859872817993, + "learning_rate": 9.960387559479725e-07, + "logits/chosen": -0.4447207450866699, + "logits/rejected": -0.371269553899765, + "logps/chosen": -0.7863065004348755, + "logps/rejected": -0.7983666658401489, + "loss": 1.4202, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.572613000869751, + "rewards/margins": 0.02412020042538643, + "rewards/rejected": -1.5967333316802979, + "step": 65 + }, + { + "epoch": 0.13954410307234885, + "grad_norm": 0.31330156326293945, + "learning_rate": 9.955597317564703e-07, + "logits/chosen": -0.42059677839279175, + "logits/rejected": -0.37605100870132446, + "logps/chosen": -0.7669360637664795, + "logps/rejected": -0.8348797559738159, + "loss": 1.3368, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.533872127532959, + "rewards/margins": 0.13588732481002808, + "rewards/rejected": -1.6697595119476318, + "step": 66 + }, + { + "epoch": 0.14165840766435414, + "grad_norm": 0.4353170096874237, + "learning_rate": 9.950535017302983e-07, + "logits/chosen": -0.3897082805633545, + "logits/rejected": -0.38229796290397644, + "logps/chosen": -0.7249190807342529, + "logps/rejected": -0.7696882486343384, + "loss": 1.3511, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4498381614685059, + "rewards/margins": 0.08953814208507538, + "rewards/rejected": -1.5393764972686768, + "step": 67 + }, + { + "epoch": 0.14377271225635943, + "grad_norm": 0.6724106669425964, + "learning_rate": 9.94520093661082e-07, + "logits/chosen": -0.3687596023082733, + "logits/rejected": -0.34222811460494995, + "logps/chosen": -0.7845972776412964, + "logps/rejected": -0.8308086395263672, + "loss": 1.3737, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5691945552825928, + "rewards/margins": 0.09242270141839981, + "rewards/rejected": -1.6616172790527344, + "step": 68 + }, + { + "epoch": 0.14588701684836472, + "grad_norm": 0.7312172651290894, + "learning_rate": 9.939595368324994e-07, + "logits/chosen": -0.4475817382335663, + "logits/rejected": -0.3975730538368225, + "logps/chosen": -0.7314785718917847, + "logps/rejected": -0.7924487590789795, + "loss": 1.3439, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4629571437835693, + "rewards/margins": 0.12194043397903442, + "rewards/rejected": -1.584897518157959, + "step": 69 + }, + { + "epoch": 0.14800132144037, + "grad_norm": 0.301097571849823, + "learning_rate": 9.933718620186744e-07, + "logits/chosen": -0.402032732963562, + "logits/rejected": -0.3640722632408142, + "logps/chosen": -0.7727882862091064, + "logps/rejected": -0.8291516304016113, + "loss": 1.358, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.545576572418213, + "rewards/margins": 0.11272668838500977, + "rewards/rejected": -1.6583032608032227, + "step": 70 + }, + { + "epoch": 0.1501156260323753, + "grad_norm": 0.377835750579834, + "learning_rate": 9.92757101482486e-07, + "logits/chosen": -0.316825270652771, + "logits/rejected": -0.3245603144168854, + "logps/chosen": -0.7962774634361267, + "logps/rejected": -0.8610175848007202, + "loss": 1.3464, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5925549268722534, + "rewards/margins": 0.12948019802570343, + "rewards/rejected": -1.7220351696014404, + "step": 71 + }, + { + "epoch": 0.1522299306243806, + "grad_norm": 0.84058678150177, + "learning_rate": 9.921152889737984e-07, + "logits/chosen": -0.4446060359477997, + "logits/rejected": -0.43160340189933777, + "logps/chosen": -0.7745426297187805, + "logps/rejected": -0.8286185264587402, + "loss": 1.3408, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.549085259437561, + "rewards/margins": 0.10815180093050003, + "rewards/rejected": -1.6572370529174805, + "step": 72 + }, + { + "epoch": 0.15434423521638585, + "grad_norm": 0.6970808506011963, + "learning_rate": 9.91446459727607e-07, + "logits/chosen": -0.4220297634601593, + "logits/rejected": -0.404453307390213, + "logps/chosen": -0.8769615888595581, + "logps/rejected": -0.9564313292503357, + "loss": 1.3423, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7539231777191162, + "rewards/margins": 0.15893957018852234, + "rewards/rejected": -1.9128626585006714, + "step": 73 + }, + { + "epoch": 0.15645853980839114, + "grad_norm": 0.5900676250457764, + "learning_rate": 9.90750650462105e-07, + "logits/chosen": -0.41884100437164307, + "logits/rejected": -0.38551777601242065, + "logps/chosen": -0.814996063709259, + "logps/rejected": -0.8892688751220703, + "loss": 1.3325, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.629992127418518, + "rewards/margins": 0.14854571223258972, + "rewards/rejected": -1.7785377502441406, + "step": 74 + }, + { + "epoch": 0.15857284440039643, + "grad_norm": 0.7245749831199646, + "learning_rate": 9.900278993766668e-07, + "logits/chosen": -0.3451727330684662, + "logits/rejected": -0.3348972201347351, + "logps/chosen": -0.8788102865219116, + "logps/rejected": -0.9273182153701782, + "loss": 1.377, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7576205730438232, + "rewards/margins": 0.09701582789421082, + "rewards/rejected": -1.8546364307403564, + "step": 75 + }, + { + "epoch": 0.16068714899240172, + "grad_norm": 0.3135383725166321, + "learning_rate": 9.89278246149752e-07, + "logits/chosen": -0.4140404760837555, + "logits/rejected": -0.38082340359687805, + "logps/chosen": -0.7513999342918396, + "logps/rejected": -0.7880118489265442, + "loss": 1.3921, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5027998685836792, + "rewards/margins": 0.07322371751070023, + "rewards/rejected": -1.5760236978530884, + "step": 76 + }, + { + "epoch": 0.162801453584407, + "grad_norm": 0.8594076633453369, + "learning_rate": 9.885017319367252e-07, + "logits/chosen": -0.35951656103134155, + "logits/rejected": -0.30456626415252686, + "logps/chosen": -0.7989844679832458, + "logps/rejected": -0.8145395517349243, + "loss": 1.4061, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5979689359664917, + "rewards/margins": 0.031110182404518127, + "rewards/rejected": -1.6290791034698486, + "step": 77 + }, + { + "epoch": 0.1649157581764123, + "grad_norm": 0.4393538534641266, + "learning_rate": 9.876983993675989e-07, + "logits/chosen": -0.33119240403175354, + "logits/rejected": -0.31970253586769104, + "logps/chosen": -0.721772313117981, + "logps/rejected": -0.7868390679359436, + "loss": 1.3325, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.443544626235962, + "rewards/margins": 0.13013358414173126, + "rewards/rejected": -1.5736781358718872, + "step": 78 + }, + { + "epoch": 0.1670300627684176, + "grad_norm": 0.8017925024032593, + "learning_rate": 9.868682925446909e-07, + "logits/chosen": -0.3608989417552948, + "logits/rejected": -0.3497124910354614, + "logps/chosen": -0.8117240071296692, + "logps/rejected": -0.8506529331207275, + "loss": 1.3671, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6234480142593384, + "rewards/margins": 0.07785768806934357, + "rewards/rejected": -1.701305866241455, + "step": 79 + }, + { + "epoch": 0.16914436736042285, + "grad_norm": 0.5704994201660156, + "learning_rate": 9.860114570402054e-07, + "logits/chosen": -0.37441548705101013, + "logits/rejected": -0.32188406586647034, + "logps/chosen": -0.745419442653656, + "logps/rejected": -0.816170871257782, + "loss": 1.3315, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.490838885307312, + "rewards/margins": 0.1415030062198639, + "rewards/rejected": -1.632341742515564, + "step": 80 + }, + { + "epoch": 0.17125867195242814, + "grad_norm": 3.0005106925964355, + "learning_rate": 9.85127939893729e-07, + "logits/chosen": -0.34791454672813416, + "logits/rejected": -0.32542383670806885, + "logps/chosen": -0.7547991275787354, + "logps/rejected": -0.7868378162384033, + "loss": 1.3742, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5095982551574707, + "rewards/margins": 0.06407731771469116, + "rewards/rejected": -1.5736756324768066, + "step": 81 + }, + { + "epoch": 0.17337297654443343, + "grad_norm": 0.5891271829605103, + "learning_rate": 9.842177896096493e-07, + "logits/chosen": -0.38649702072143555, + "logits/rejected": -0.36892226338386536, + "logps/chosen": -0.7556143999099731, + "logps/rejected": -0.82858806848526, + "loss": 1.3233, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5112287998199463, + "rewards/margins": 0.14594702422618866, + "rewards/rejected": -1.65717613697052, + "step": 82 + }, + { + "epoch": 0.17548728113643872, + "grad_norm": 0.27861157059669495, + "learning_rate": 9.832810561544923e-07, + "logits/chosen": -0.38264670968055725, + "logits/rejected": -0.35908499360084534, + "logps/chosen": -0.7858557105064392, + "logps/rejected": -0.8571599721908569, + "loss": 1.3234, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5717114210128784, + "rewards/margins": 0.1426086127758026, + "rewards/rejected": -1.7143199443817139, + "step": 83 + }, + { + "epoch": 0.177601585728444, + "grad_norm": 0.8158763647079468, + "learning_rate": 9.823177909541793e-07, + "logits/chosen": -0.4076104760169983, + "logits/rejected": -0.3934200704097748, + "logps/chosen": -0.8089872002601624, + "logps/rejected": -0.8885407447814941, + "loss": 1.3476, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6179744005203247, + "rewards/margins": 0.15910708904266357, + "rewards/rejected": -1.7770814895629883, + "step": 84 + }, + { + "epoch": 0.1797158903204493, + "grad_norm": 0.46008333563804626, + "learning_rate": 9.813280468912022e-07, + "logits/chosen": -0.33124151825904846, + "logits/rejected": -0.34535717964172363, + "logps/chosen": -0.733020544052124, + "logps/rejected": -0.8716557621955872, + "loss": 1.2807, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.466041088104248, + "rewards/margins": 0.27727028727531433, + "rewards/rejected": -1.7433115243911743, + "step": 85 + }, + { + "epoch": 0.18183019491245458, + "grad_norm": 0.3784334659576416, + "learning_rate": 9.80311878301722e-07, + "logits/chosen": -0.40713849663734436, + "logits/rejected": -0.3808574080467224, + "logps/chosen": -0.7063947319984436, + "logps/rejected": -0.7589148879051208, + "loss": 1.3501, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4127894639968872, + "rewards/margins": 0.10504024475812912, + "rewards/rejected": -1.5178297758102417, + "step": 86 + }, + { + "epoch": 0.18394449950445987, + "grad_norm": 0.6137813329696655, + "learning_rate": 9.792693409725853e-07, + "logits/chosen": -0.4119255542755127, + "logits/rejected": -0.44221603870391846, + "logps/chosen": -0.795850932598114, + "logps/rejected": -0.8925026059150696, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.591701865196228, + "rewards/margins": 0.19330324232578278, + "rewards/rejected": -1.7850052118301392, + "step": 87 + }, + { + "epoch": 0.18605880409646514, + "grad_norm": 0.5354250073432922, + "learning_rate": 9.78200492138261e-07, + "logits/chosen": -0.3792279064655304, + "logits/rejected": -0.3789527714252472, + "logps/chosen": -0.7249161005020142, + "logps/rejected": -0.8088154792785645, + "loss": 1.3304, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4498322010040283, + "rewards/margins": 0.16779886186122894, + "rewards/rejected": -1.617630958557129, + "step": 88 + }, + { + "epoch": 0.18817310868847043, + "grad_norm": 0.6911243796348572, + "learning_rate": 9.771053904776995e-07, + "logits/chosen": -0.38837429881095886, + "logits/rejected": -0.36597418785095215, + "logps/chosen": -0.7528612017631531, + "logps/rejected": -0.7981135249137878, + "loss": 1.3481, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.5057224035263062, + "rewards/margins": 0.09050464630126953, + "rewards/rejected": -1.5962270498275757, + "step": 89 + }, + { + "epoch": 0.19028741328047571, + "grad_norm": 0.37110790610313416, + "learning_rate": 9.759840961111097e-07, + "logits/chosen": -0.3804919421672821, + "logits/rejected": -0.38750600814819336, + "logps/chosen": -0.8673248291015625, + "logps/rejected": -0.9381619691848755, + "loss": 1.3303, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.734649658203125, + "rewards/margins": 0.14167429506778717, + "rewards/rejected": -1.876323938369751, + "step": 90 + }, + { + "epoch": 0.192401717872481, + "grad_norm": 0.8033086657524109, + "learning_rate": 9.748366705966593e-07, + "logits/chosen": -0.3804866075515747, + "logits/rejected": -0.31055447459220886, + "logps/chosen": -0.7535511255264282, + "logps/rejected": -0.7824290990829468, + "loss": 1.3706, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5071022510528564, + "rewards/margins": 0.057755980640649796, + "rewards/rejected": -1.5648581981658936, + "step": 91 + }, + { + "epoch": 0.1945160224644863, + "grad_norm": 0.6934167742729187, + "learning_rate": 9.736631769270957e-07, + "logits/chosen": -0.443461149930954, + "logits/rejected": -0.4398806691169739, + "logps/chosen": -0.8123858571052551, + "logps/rejected": -0.8972252607345581, + "loss": 1.3464, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6247717142105103, + "rewards/margins": 0.16967862844467163, + "rewards/rejected": -1.7944505214691162, + "step": 92 + }, + { + "epoch": 0.19663032705649158, + "grad_norm": 1.140067458152771, + "learning_rate": 9.724636795262866e-07, + "logits/chosen": -0.43793433904647827, + "logits/rejected": -0.4402340352535248, + "logps/chosen": -0.8155819177627563, + "logps/rejected": -0.8659977912902832, + "loss": 1.3621, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6311638355255127, + "rewards/margins": 0.10083187371492386, + "rewards/rejected": -1.7319955825805664, + "step": 93 + }, + { + "epoch": 0.19874463164849687, + "grad_norm": 1.8303897380828857, + "learning_rate": 9.712382442456844e-07, + "logits/chosen": -0.34288379549980164, + "logits/rejected": -0.36632782220840454, + "logps/chosen": -0.7338054776191711, + "logps/rejected": -0.8537961840629578, + "loss": 1.2942, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4676109552383423, + "rewards/margins": 0.23998141288757324, + "rewards/rejected": -1.7075923681259155, + "step": 94 + }, + { + "epoch": 0.20085893624050213, + "grad_norm": 0.34392252564430237, + "learning_rate": 9.6998693836071e-07, + "logits/chosen": -0.4381723999977112, + "logits/rejected": -0.4031081199645996, + "logps/chosen": -0.7130292057991028, + "logps/rejected": -0.7402217388153076, + "loss": 1.3766, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4260584115982056, + "rewards/margins": 0.0543849840760231, + "rewards/rejected": -1.4804434776306152, + "step": 95 + }, + { + "epoch": 0.20297324083250742, + "grad_norm": 0.4129842519760132, + "learning_rate": 9.687098305670604e-07, + "logits/chosen": -0.39796924591064453, + "logits/rejected": -0.3476859927177429, + "logps/chosen": -0.7520885467529297, + "logps/rejected": -0.8058558702468872, + "loss": 1.3663, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5041770935058594, + "rewards/margins": 0.10753461718559265, + "rewards/rejected": -1.6117117404937744, + "step": 96 + }, + { + "epoch": 0.2050875454245127, + "grad_norm": 0.5054985284805298, + "learning_rate": 9.674069909769362e-07, + "logits/chosen": -0.3942393660545349, + "logits/rejected": -0.3627544045448303, + "logps/chosen": -0.733702540397644, + "logps/rejected": -0.781308650970459, + "loss": 1.342, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.467405080795288, + "rewards/margins": 0.09521210938692093, + "rewards/rejected": -1.562617301940918, + "step": 97 + }, + { + "epoch": 0.207201850016518, + "grad_norm": 0.6975870728492737, + "learning_rate": 9.66078491115194e-07, + "logits/chosen": -0.38557127118110657, + "logits/rejected": -0.3581204414367676, + "logps/chosen": -0.7359838485717773, + "logps/rejected": -0.7648134827613831, + "loss": 1.3841, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.4719676971435547, + "rewards/margins": 0.057659298181533813, + "rewards/rejected": -1.5296269655227661, + "step": 98 + }, + { + "epoch": 0.2093161546085233, + "grad_norm": 0.7575029730796814, + "learning_rate": 9.647244039154177e-07, + "logits/chosen": -0.3871467411518097, + "logits/rejected": -0.3941374123096466, + "logps/chosen": -0.6516871452331543, + "logps/rejected": -0.7066073417663574, + "loss": 1.3364, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3033742904663086, + "rewards/margins": 0.10984040796756744, + "rewards/rejected": -1.4132146835327148, + "step": 99 + }, + { + "epoch": 0.21143045920052858, + "grad_norm": 1.3344250917434692, + "learning_rate": 9.633448037159166e-07, + "logits/chosen": -0.40887755155563354, + "logits/rejected": -0.41733911633491516, + "logps/chosen": -0.6978950500488281, + "logps/rejected": -0.793424129486084, + "loss": 1.3076, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3957901000976562, + "rewards/margins": 0.1910584717988968, + "rewards/rejected": -1.586848258972168, + "step": 100 + }, + { + "epoch": 0.21354476379253387, + "grad_norm": 0.8798456788063049, + "learning_rate": 9.619397662556433e-07, + "logits/chosen": -0.302534282207489, + "logits/rejected": -0.29954588413238525, + "logps/chosen": -0.719552755355835, + "logps/rejected": -0.7628123164176941, + "loss": 1.3699, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.43910551071167, + "rewards/margins": 0.08651915192604065, + "rewards/rejected": -1.5256246328353882, + "step": 101 + }, + { + "epoch": 0.21565906838453916, + "grad_norm": 0.8746365308761597, + "learning_rate": 9.605093686700353e-07, + "logits/chosen": -0.372263640165329, + "logits/rejected": -0.3714321255683899, + "logps/chosen": -0.6665956974029541, + "logps/rejected": -0.7361368536949158, + "loss": 1.3173, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3331913948059082, + "rewards/margins": 0.1390824019908905, + "rewards/rejected": -1.4722737073898315, + "step": 102 + }, + { + "epoch": 0.21777337297654442, + "grad_norm": 0.8450930714607239, + "learning_rate": 9.590536894867812e-07, + "logits/chosen": -0.37228280305862427, + "logits/rejected": -0.37763556838035583, + "logps/chosen": -0.7425979375839233, + "logps/rejected": -0.7557005882263184, + "loss": 1.4085, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4851958751678467, + "rewards/margins": 0.026205357164144516, + "rewards/rejected": -1.5114011764526367, + "step": 103 + }, + { + "epoch": 0.2198876775685497, + "grad_norm": 0.5075035691261292, + "learning_rate": 9.575728086215091e-07, + "logits/chosen": -0.4433964788913727, + "logits/rejected": -0.3782787024974823, + "logps/chosen": -0.7308244109153748, + "logps/rejected": -0.8043883442878723, + "loss": 1.3353, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4616488218307495, + "rewards/margins": 0.14712783694267273, + "rewards/rejected": -1.6087766885757446, + "step": 104 + }, + { + "epoch": 0.222001982160555, + "grad_norm": 1.0270946025848389, + "learning_rate": 9.560668073733993e-07, + "logits/chosen": -0.3593980073928833, + "logits/rejected": -0.3159312903881073, + "logps/chosen": -0.757469892501831, + "logps/rejected": -0.8256179094314575, + "loss": 1.3289, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.514939785003662, + "rewards/margins": 0.13629598915576935, + "rewards/rejected": -1.651235818862915, + "step": 105 + }, + { + "epoch": 0.2241162867525603, + "grad_norm": 0.8390078544616699, + "learning_rate": 9.54535768420721e-07, + "logits/chosen": -0.3266332149505615, + "logits/rejected": -0.3008713722229004, + "logps/chosen": -0.7286102771759033, + "logps/rejected": -0.7803273797035217, + "loss": 1.3593, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4572205543518066, + "rewards/margins": 0.10343428701162338, + "rewards/rejected": -1.5606547594070435, + "step": 106 + }, + { + "epoch": 0.22623059134456558, + "grad_norm": 1.130595088005066, + "learning_rate": 9.529797758162934e-07, + "logits/chosen": -0.36109817028045654, + "logits/rejected": -0.34797021746635437, + "logps/chosen": -0.7723361253738403, + "logps/rejected": -0.8873662352561951, + "loss": 1.2956, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5446722507476807, + "rewards/margins": 0.23006024956703186, + "rewards/rejected": -1.7747324705123901, + "step": 107 + }, + { + "epoch": 0.22834489593657087, + "grad_norm": 0.9399718642234802, + "learning_rate": 9.513989149828717e-07, + "logits/chosen": -0.3596777021884918, + "logits/rejected": -0.3660539388656616, + "logps/chosen": -0.7130635976791382, + "logps/rejected": -0.7378955483436584, + "loss": 1.3774, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4261271953582764, + "rewards/margins": 0.04966379329562187, + "rewards/rejected": -1.475791096687317, + "step": 108 + }, + { + "epoch": 0.23045920052857616, + "grad_norm": 1.097594976425171, + "learning_rate": 9.49793272708457e-07, + "logits/chosen": -0.31783169507980347, + "logits/rejected": -0.3008044362068176, + "logps/chosen": -0.6933202147483826, + "logps/rejected": -0.7510000467300415, + "loss": 1.3498, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3866404294967651, + "rewards/margins": 0.1153596043586731, + "rewards/rejected": -1.502000093460083, + "step": 109 + }, + { + "epoch": 0.23257350512058145, + "grad_norm": 1.096330165863037, + "learning_rate": 9.481629371415313e-07, + "logits/chosen": -0.3582899570465088, + "logits/rejected": -0.3120020031929016, + "logps/chosen": -0.817268431186676, + "logps/rejected": -0.8862374424934387, + "loss": 1.3349, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.634536862373352, + "rewards/margins": 0.13793781399726868, + "rewards/rejected": -1.7724748849868774, + "step": 110 + }, + { + "epoch": 0.2346878097125867, + "grad_norm": 0.8261978626251221, + "learning_rate": 9.465079977862192e-07, + "logits/chosen": -0.41336673498153687, + "logits/rejected": -0.39544352889060974, + "logps/chosen": -0.7673372030258179, + "logps/rejected": -0.8331737518310547, + "loss": 1.3373, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5346744060516357, + "rewards/margins": 0.1316729635000229, + "rewards/rejected": -1.6663475036621094, + "step": 111 + }, + { + "epoch": 0.236802114304592, + "grad_norm": 0.5922806262969971, + "learning_rate": 9.448285454973737e-07, + "logits/chosen": -0.3224758207798004, + "logits/rejected": -0.3118049204349518, + "logps/chosen": -0.7584627866744995, + "logps/rejected": -0.8859898447990417, + "loss": 1.2731, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.516925573348999, + "rewards/margins": 0.2550540566444397, + "rewards/rejected": -1.7719796895980835, + "step": 112 + }, + { + "epoch": 0.2389164188965973, + "grad_norm": 1.3172541856765747, + "learning_rate": 9.431246724755877e-07, + "logits/chosen": -0.4287208318710327, + "logits/rejected": -0.3984590172767639, + "logps/chosen": -0.7587048411369324, + "logps/rejected": -0.7860502004623413, + "loss": 1.3832, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5174096822738647, + "rewards/margins": 0.05469079315662384, + "rewards/rejected": -1.5721004009246826, + "step": 113 + }, + { + "epoch": 0.24103072348860258, + "grad_norm": 0.7749882340431213, + "learning_rate": 9.413964722621337e-07, + "logits/chosen": -0.39085906744003296, + "logits/rejected": -0.3316206932067871, + "logps/chosen": -0.7035898566246033, + "logps/rejected": -0.7375759482383728, + "loss": 1.387, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4071797132492065, + "rewards/margins": 0.06797221302986145, + "rewards/rejected": -1.4751518964767456, + "step": 114 + }, + { + "epoch": 0.24314502808060787, + "grad_norm": 1.0914056301116943, + "learning_rate": 9.396440397338272e-07, + "logits/chosen": -0.38826486468315125, + "logits/rejected": -0.35520774126052856, + "logps/chosen": -0.7385872602462769, + "logps/rejected": -0.7974889278411865, + "loss": 1.3477, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4771745204925537, + "rewards/margins": 0.11780343949794769, + "rewards/rejected": -1.594977855682373, + "step": 115 + }, + { + "epoch": 0.24525933267261316, + "grad_norm": 1.2966018915176392, + "learning_rate": 9.378674710978183e-07, + "logits/chosen": -0.36493802070617676, + "logits/rejected": -0.34763696789741516, + "logps/chosen": -0.6731826663017273, + "logps/rejected": -0.7645149827003479, + "loss": 1.3, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3463653326034546, + "rewards/margins": 0.18266455829143524, + "rewards/rejected": -1.5290299654006958, + "step": 116 + }, + { + "epoch": 0.24737363726461845, + "grad_norm": 0.49401605129241943, + "learning_rate": 9.360668638863109e-07, + "logits/chosen": -0.40416795015335083, + "logits/rejected": -0.3815993070602417, + "logps/chosen": -0.719497799873352, + "logps/rejected": -0.7588324546813965, + "loss": 1.3621, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.438995599746704, + "rewards/margins": 0.07866920530796051, + "rewards/rejected": -1.517664909362793, + "step": 117 + }, + { + "epoch": 0.2494879418566237, + "grad_norm": 1.0603238344192505, + "learning_rate": 9.342423169512071e-07, + "logits/chosen": -0.3857055604457855, + "logits/rejected": -0.3524513244628906, + "logps/chosen": -0.7373769283294678, + "logps/rejected": -0.7971038818359375, + "loss": 1.3358, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4747538566589355, + "rewards/margins": 0.11945393681526184, + "rewards/rejected": -1.594207763671875, + "step": 118 + }, + { + "epoch": 0.251602246448629, + "grad_norm": 0.9880490303039551, + "learning_rate": 9.323939304586804e-07, + "logits/chosen": -0.31455785036087036, + "logits/rejected": -0.3102484941482544, + "logps/chosen": -0.7276102900505066, + "logps/rejected": -0.7446941137313843, + "loss": 1.3928, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.4552205801010132, + "rewards/margins": 0.034167706966400146, + "rewards/rejected": -1.4893882274627686, + "step": 119 + }, + { + "epoch": 0.2537165510406343, + "grad_norm": 1.0419566631317139, + "learning_rate": 9.305218058836776e-07, + "logits/chosen": -0.38093918561935425, + "logits/rejected": -0.3588898181915283, + "logps/chosen": -0.715582013130188, + "logps/rejected": -0.8271002769470215, + "loss": 1.2934, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.431164026260376, + "rewards/margins": 0.22303667664527893, + "rewards/rejected": -1.654200553894043, + "step": 120 + }, + { + "epoch": 0.2558308556326396, + "grad_norm": 0.657620370388031, + "learning_rate": 9.286260460043473e-07, + "logits/chosen": -0.45690783858299255, + "logits/rejected": -0.4082674980163574, + "logps/chosen": -0.6932571530342102, + "logps/rejected": -0.7631082534790039, + "loss": 1.3398, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3865143060684204, + "rewards/margins": 0.13970226049423218, + "rewards/rejected": -1.5262165069580078, + "step": 121 + }, + { + "epoch": 0.25794516022464486, + "grad_norm": 0.983686089515686, + "learning_rate": 9.267067548963974e-07, + "logits/chosen": -0.40266987681388855, + "logits/rejected": -0.37586671113967896, + "logps/chosen": -0.7362720966339111, + "logps/rejected": -0.7538987398147583, + "loss": 1.4066, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4725441932678223, + "rewards/margins": 0.03525342047214508, + "rewards/rejected": -1.5077974796295166, + "step": 122 + }, + { + "epoch": 0.26005946481665015, + "grad_norm": 1.0076361894607544, + "learning_rate": 9.24764037927381e-07, + "logits/chosen": -0.4461461007595062, + "logits/rejected": -0.40700826048851013, + "logps/chosen": -0.7206646800041199, + "logps/rejected": -0.7489192485809326, + "loss": 1.3759, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4413293600082397, + "rewards/margins": 0.05650928616523743, + "rewards/rejected": -1.4978384971618652, + "step": 123 + }, + { + "epoch": 0.26217376940865544, + "grad_norm": 0.933315098285675, + "learning_rate": 9.22798001750913e-07, + "logits/chosen": -0.3966676890850067, + "logits/rejected": -0.3572196960449219, + "logps/chosen": -0.7075096368789673, + "logps/rejected": -0.7406759262084961, + "loss": 1.3667, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4150192737579346, + "rewards/margins": 0.06633266806602478, + "rewards/rejected": -1.4813518524169922, + "step": 124 + }, + { + "epoch": 0.26428807400066073, + "grad_norm": 0.6277392506599426, + "learning_rate": 9.20808754300814e-07, + "logits/chosen": -0.3555490970611572, + "logits/rejected": -0.35786163806915283, + "logps/chosen": -0.7549921274185181, + "logps/rejected": -0.832869291305542, + "loss": 1.3175, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.5099842548370361, + "rewards/margins": 0.15575438737869263, + "rewards/rejected": -1.665738582611084, + "step": 125 + }, + { + "epoch": 0.266402378592666, + "grad_norm": 0.7172744274139404, + "learning_rate": 9.18796404785185e-07, + "logits/chosen": -0.41230690479278564, + "logits/rejected": -0.39935630559921265, + "logps/chosen": -0.7129833698272705, + "logps/rejected": -0.7888559103012085, + "loss": 1.3167, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.425966739654541, + "rewards/margins": 0.15174514055252075, + "rewards/rejected": -1.577711820602417, + "step": 126 + }, + { + "epoch": 0.2685166831846713, + "grad_norm": 1.2629508972167969, + "learning_rate": 9.16761063680412e-07, + "logits/chosen": -0.36754000186920166, + "logits/rejected": -0.3541562259197235, + "logps/chosen": -0.6992133855819702, + "logps/rejected": -0.7668892741203308, + "loss": 1.3735, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3984267711639404, + "rewards/margins": 0.13535188138484955, + "rewards/rejected": -1.5337785482406616, + "step": 127 + }, + { + "epoch": 0.2706309877766766, + "grad_norm": 0.7024405598640442, + "learning_rate": 9.147028427251009e-07, + "logits/chosen": -0.4014585018157959, + "logits/rejected": -0.40560898184776306, + "logps/chosen": -0.727234959602356, + "logps/rejected": -0.8070081472396851, + "loss": 1.3138, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.454469919204712, + "rewards/margins": 0.15954652428627014, + "rewards/rejected": -1.6140162944793701, + "step": 128 + }, + { + "epoch": 0.2727452923686819, + "grad_norm": 1.6173532009124756, + "learning_rate": 9.126218549139433e-07, + "logits/chosen": -0.32572367787361145, + "logits/rejected": -0.3470613956451416, + "logps/chosen": -0.7555541396141052, + "logps/rejected": -0.8856738209724426, + "loss": 1.2461, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.5111082792282104, + "rewards/margins": 0.26023951172828674, + "rewards/rejected": -1.7713476419448853, + "step": 129 + }, + { + "epoch": 0.2748595969606871, + "grad_norm": 0.5878487229347229, + "learning_rate": 9.105182144915129e-07, + "logits/chosen": -0.39267170429229736, + "logits/rejected": -0.3448992967605591, + "logps/chosen": -0.6776289343833923, + "logps/rejected": -0.7530183792114258, + "loss": 1.3242, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3552578687667847, + "rewards/margins": 0.15077897906303406, + "rewards/rejected": -1.5060367584228516, + "step": 130 + }, + { + "epoch": 0.2769739015526924, + "grad_norm": 0.43264809250831604, + "learning_rate": 9.08392036945994e-07, + "logits/chosen": -0.39980950951576233, + "logits/rejected": -0.4247930645942688, + "logps/chosen": -0.7898982167243958, + "logps/rejected": -0.8856299519538879, + "loss": 1.3004, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5797964334487915, + "rewards/margins": 0.19146347045898438, + "rewards/rejected": -1.7712599039077759, + "step": 131 + }, + { + "epoch": 0.2790882061446977, + "grad_norm": 1.0348538160324097, + "learning_rate": 9.062434390028407e-07, + "logits/chosen": -0.35729700326919556, + "logits/rejected": -0.3265542984008789, + "logps/chosen": -0.7120587229728699, + "logps/rejected": -0.771691083908081, + "loss": 1.3374, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4241174459457397, + "rewards/margins": 0.11926469206809998, + "rewards/rejected": -1.543382167816162, + "step": 132 + }, + { + "epoch": 0.281202510736703, + "grad_norm": 2.0902225971221924, + "learning_rate": 9.04072538618369e-07, + "logits/chosen": -0.4942469298839569, + "logits/rejected": -0.48699846863746643, + "logps/chosen": -0.7882512211799622, + "logps/rejected": -0.8270165920257568, + "loss": 1.3715, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5765024423599243, + "rewards/margins": 0.07753071188926697, + "rewards/rejected": -1.6540331840515137, + "step": 133 + }, + { + "epoch": 0.2833168153287083, + "grad_norm": 1.6436113119125366, + "learning_rate": 9.018794549732817e-07, + "logits/chosen": -0.41133156418800354, + "logits/rejected": -0.4146718382835388, + "logps/chosen": -0.779824435710907, + "logps/rejected": -0.9421006441116333, + "loss": 1.2521, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.559648871421814, + "rewards/margins": 0.324552446603775, + "rewards/rejected": -1.8842012882232666, + "step": 134 + }, + { + "epoch": 0.28543111992071357, + "grad_norm": 0.8831859827041626, + "learning_rate": 8.996643084661244e-07, + "logits/chosen": -0.42452165484428406, + "logits/rejected": -0.3798604905605316, + "logps/chosen": -0.6499216556549072, + "logps/rejected": -0.7796702980995178, + "loss": 1.2581, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.2998433113098145, + "rewards/margins": 0.25949734449386597, + "rewards/rejected": -1.5593405961990356, + "step": 135 + }, + { + "epoch": 0.28754542451271886, + "grad_norm": 0.8031218647956848, + "learning_rate": 8.974272207066767e-07, + "logits/chosen": -0.38131940364837646, + "logits/rejected": -0.3854255676269531, + "logps/chosen": -0.7026851773262024, + "logps/rejected": -0.762391209602356, + "loss": 1.3333, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4053703546524048, + "rewards/margins": 0.11941206455230713, + "rewards/rejected": -1.524782419204712, + "step": 136 + }, + { + "epoch": 0.28965972910472415, + "grad_norm": 1.4455821514129639, + "learning_rate": 8.951683145092748e-07, + "logits/chosen": -0.42824965715408325, + "logits/rejected": -0.4320424795150757, + "logps/chosen": -0.7893270254135132, + "logps/rejected": -0.8517144322395325, + "loss": 1.3652, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5786540508270264, + "rewards/margins": 0.12477481365203857, + "rewards/rejected": -1.703428864479065, + "step": 137 + }, + { + "epoch": 0.29177403369672944, + "grad_norm": 0.6299450397491455, + "learning_rate": 8.928877138860706e-07, + "logits/chosen": -0.4388589560985565, + "logits/rejected": -0.40156903862953186, + "logps/chosen": -0.7346572875976562, + "logps/rejected": -0.8166492581367493, + "loss": 1.3134, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4693145751953125, + "rewards/margins": 0.16398391127586365, + "rewards/rejected": -1.6332985162734985, + "step": 138 + }, + { + "epoch": 0.29388833828873473, + "grad_norm": 2.784437417984009, + "learning_rate": 8.905855440402224e-07, + "logits/chosen": -0.405662477016449, + "logits/rejected": -0.35549795627593994, + "logps/chosen": -0.7482771277427673, + "logps/rejected": -0.795568585395813, + "loss": 1.3656, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.4965542554855347, + "rewards/margins": 0.09458285570144653, + "rewards/rejected": -1.591137170791626, + "step": 139 + }, + { + "epoch": 0.29600264288074, + "grad_norm": 0.4958692193031311, + "learning_rate": 8.882619313590212e-07, + "logits/chosen": -0.3814452886581421, + "logits/rejected": -0.35715553164482117, + "logps/chosen": -0.7731542587280273, + "logps/rejected": -0.8285202980041504, + "loss": 1.3776, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5463085174560547, + "rewards/margins": 0.11073210835456848, + "rewards/rejected": -1.6570405960083008, + "step": 140 + }, + { + "epoch": 0.2981169474727453, + "grad_norm": 0.4597362279891968, + "learning_rate": 8.859170034069532e-07, + "logits/chosen": -0.388383150100708, + "logits/rejected": -0.4071737229824066, + "logps/chosen": -0.7263504266738892, + "logps/rejected": -0.769676148891449, + "loss": 1.3712, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4527008533477783, + "rewards/margins": 0.08665145933628082, + "rewards/rejected": -1.539352297782898, + "step": 141 + }, + { + "epoch": 0.3002312520647506, + "grad_norm": 0.4914930760860443, + "learning_rate": 8.835508889186956e-07, + "logits/chosen": -0.41084378957748413, + "logits/rejected": -0.3823031187057495, + "logps/chosen": -0.7565821409225464, + "logps/rejected": -0.9084322452545166, + "loss": 1.2717, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5131642818450928, + "rewards/margins": 0.3037002384662628, + "rewards/rejected": -1.8168644905090332, + "step": 142 + }, + { + "epoch": 0.3023455566567559, + "grad_norm": 2.0075581073760986, + "learning_rate": 8.811637177920499e-07, + "logits/chosen": -0.4438302516937256, + "logits/rejected": -0.4916025698184967, + "logps/chosen": -0.800719141960144, + "logps/rejected": -0.8658267855644226, + "loss": 1.358, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.601438283920288, + "rewards/margins": 0.1302153617143631, + "rewards/rejected": -1.7316535711288452, + "step": 143 + }, + { + "epoch": 0.3044598612487612, + "grad_norm": 1.1243022680282593, + "learning_rate": 8.7875562108081e-07, + "logits/chosen": -0.40519949793815613, + "logits/rejected": -0.3905750811100006, + "logps/chosen": -0.689585268497467, + "logps/rejected": -0.7312421798706055, + "loss": 1.3503, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.379170536994934, + "rewards/margins": 0.08331384509801865, + "rewards/rejected": -1.462484359741211, + "step": 144 + }, + { + "epoch": 0.3065741658407664, + "grad_norm": 0.7543137669563293, + "learning_rate": 8.76326730987568e-07, + "logits/chosen": -0.4696752727031708, + "logits/rejected": -0.4357326626777649, + "logps/chosen": -0.7813425660133362, + "logps/rejected": -0.8276973962783813, + "loss": 1.3794, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5626851320266724, + "rewards/margins": 0.09270970523357391, + "rewards/rejected": -1.6553947925567627, + "step": 145 + }, + { + "epoch": 0.3086884704327717, + "grad_norm": 1.3136053085327148, + "learning_rate": 8.738771808564555e-07, + "logits/chosen": -0.4262731075286865, + "logits/rejected": -0.44038820266723633, + "logps/chosen": -0.697494387626648, + "logps/rejected": -0.8369535803794861, + "loss": 1.2699, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.394988775253296, + "rewards/margins": 0.2789183557033539, + "rewards/rejected": -1.6739071607589722, + "step": 146 + }, + { + "epoch": 0.310802775024777, + "grad_norm": 2.221562385559082, + "learning_rate": 8.714071051658245e-07, + "logits/chosen": -0.40089336037635803, + "logits/rejected": -0.37991875410079956, + "logps/chosen": -0.7704445123672485, + "logps/rejected": -0.859091579914093, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.540889024734497, + "rewards/margins": 0.17729414999485016, + "rewards/rejected": -1.718183159828186, + "step": 147 + }, + { + "epoch": 0.3129170796167823, + "grad_norm": 1.5049912929534912, + "learning_rate": 8.689166395208636e-07, + "logits/chosen": -0.38984015583992004, + "logits/rejected": -0.35900723934173584, + "logps/chosen": -0.6424779891967773, + "logps/rejected": -0.7145389318466187, + "loss": 1.3261, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.2849559783935547, + "rewards/margins": 0.14412200450897217, + "rewards/rejected": -1.4290778636932373, + "step": 148 + }, + { + "epoch": 0.31503138420878757, + "grad_norm": 0.36125388741493225, + "learning_rate": 8.664059206461534e-07, + "logits/chosen": -0.3490441143512726, + "logits/rejected": -0.3219914436340332, + "logps/chosen": -0.7200264930725098, + "logps/rejected": -0.7924249768257141, + "loss": 1.3476, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4400529861450195, + "rewards/margins": 0.1447969526052475, + "rewards/rejected": -1.5848499536514282, + "step": 149 + }, + { + "epoch": 0.31714568880079286, + "grad_norm": 1.039840579032898, + "learning_rate": 8.638750863781612e-07, + "logits/chosen": -0.40701645612716675, + "logits/rejected": -0.406186580657959, + "logps/chosen": -0.7083575129508972, + "logps/rejected": -0.7766748070716858, + "loss": 1.3263, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4167150259017944, + "rewards/margins": 0.1366347074508667, + "rewards/rejected": -1.5533496141433716, + "step": 150 + }, + { + "epoch": 0.31925999339279815, + "grad_norm": 0.7128564119338989, + "learning_rate": 8.613242756576728e-07, + "logits/chosen": -0.40932926535606384, + "logits/rejected": -0.4234562814235687, + "logps/chosen": -0.6775843501091003, + "logps/rejected": -0.7866222858428955, + "loss": 1.2834, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.3551687002182007, + "rewards/margins": 0.2180757373571396, + "rewards/rejected": -1.573244571685791, + "step": 151 + }, + { + "epoch": 0.32137429798480344, + "grad_norm": 1.1701059341430664, + "learning_rate": 8.587536285221655e-07, + "logits/chosen": -0.3654797077178955, + "logits/rejected": -0.3181680738925934, + "logps/chosen": -0.6686022877693176, + "logps/rejected": -0.7058504223823547, + "loss": 1.3612, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.3372045755386353, + "rewards/margins": 0.07449636608362198, + "rewards/rejected": -1.4117008447647095, + "step": 152 + }, + { + "epoch": 0.3234886025768087, + "grad_norm": 0.8239700794219971, + "learning_rate": 8.561632860981204e-07, + "logits/chosen": -0.42527130246162415, + "logits/rejected": -0.4091627299785614, + "logps/chosen": -0.6969794631004333, + "logps/rejected": -0.8019355535507202, + "loss": 1.2974, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3939589262008667, + "rewards/margins": 0.20991206169128418, + "rewards/rejected": -1.6038711071014404, + "step": 153 + }, + { + "epoch": 0.325602907168814, + "grad_norm": 1.4885636568069458, + "learning_rate": 8.535533905932737e-07, + "logits/chosen": -0.4126192331314087, + "logits/rejected": -0.41548141837120056, + "logps/chosen": -0.7076549530029297, + "logps/rejected": -0.7940821051597595, + "loss": 1.3198, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4153099060058594, + "rewards/margins": 0.17285437881946564, + "rewards/rejected": -1.588164210319519, + "step": 154 + }, + { + "epoch": 0.3277172117608193, + "grad_norm": 1.439434289932251, + "learning_rate": 8.509240852888106e-07, + "logits/chosen": -0.3763914704322815, + "logits/rejected": -0.3617165684700012, + "logps/chosen": -0.7189474105834961, + "logps/rejected": -0.827629804611206, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4378948211669922, + "rewards/margins": 0.2173648476600647, + "rewards/rejected": -1.655259609222412, + "step": 155 + }, + { + "epoch": 0.3298315163528246, + "grad_norm": 1.4505418539047241, + "learning_rate": 8.482755145314985e-07, + "logits/chosen": -0.37879478931427, + "logits/rejected": -0.38689684867858887, + "logps/chosen": -0.7011865973472595, + "logps/rejected": -0.8019431829452515, + "loss": 1.3158, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.402373194694519, + "rewards/margins": 0.2015131413936615, + "rewards/rejected": -1.603886365890503, + "step": 156 + }, + { + "epoch": 0.3319458209448299, + "grad_norm": 2.0968713760375977, + "learning_rate": 8.45607823725763e-07, + "logits/chosen": -0.4366365075111389, + "logits/rejected": -0.41210681200027466, + "logps/chosen": -0.6455651521682739, + "logps/rejected": -0.7228428721427917, + "loss": 1.3247, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.2911303043365479, + "rewards/margins": 0.1545555144548416, + "rewards/rejected": -1.4456857442855835, + "step": 157 + }, + { + "epoch": 0.3340601255368352, + "grad_norm": 0.6716106534004211, + "learning_rate": 8.429211593257052e-07, + "logits/chosen": -0.42992207407951355, + "logits/rejected": -0.4105672836303711, + "logps/chosen": -0.6981461048126221, + "logps/rejected": -0.7909567952156067, + "loss": 1.3128, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3962922096252441, + "rewards/margins": 0.1856214702129364, + "rewards/rejected": -1.5819135904312134, + "step": 158 + }, + { + "epoch": 0.33617443012884046, + "grad_norm": 2.4430501461029053, + "learning_rate": 8.402156688270612e-07, + "logits/chosen": -0.4184916317462921, + "logits/rejected": -0.3943992257118225, + "logps/chosen": -0.6568948030471802, + "logps/rejected": -0.7506390810012817, + "loss": 1.2992, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3137896060943604, + "rewards/margins": 0.18748846650123596, + "rewards/rejected": -1.5012781620025635, + "step": 159 + }, + { + "epoch": 0.3382887347208457, + "grad_norm": 2.0322091579437256, + "learning_rate": 8.374915007591052e-07, + "logits/chosen": -0.4713057577610016, + "logits/rejected": -0.42163771390914917, + "logps/chosen": -0.7347853779792786, + "logps/rejected": -0.7770044207572937, + "loss": 1.3801, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4695707559585571, + "rewards/margins": 0.0844380110502243, + "rewards/rejected": -1.5540088415145874, + "step": 160 + }, + { + "epoch": 0.340403039312851, + "grad_norm": 0.4045500159263611, + "learning_rate": 8.347488046764948e-07, + "logits/chosen": -0.39465126395225525, + "logits/rejected": -0.3961923122406006, + "logps/chosen": -0.601732075214386, + "logps/rejected": -0.694148600101471, + "loss": 1.2859, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.203464150428772, + "rewards/margins": 0.18483319878578186, + "rewards/rejected": -1.388297200202942, + "step": 161 + }, + { + "epoch": 0.3425173439048563, + "grad_norm": 2.79396915435791, + "learning_rate": 8.319877311510612e-07, + "logits/chosen": -0.4311378002166748, + "logits/rejected": -0.4248836636543274, + "logps/chosen": -0.6813413500785828, + "logps/rejected": -0.775830864906311, + "loss": 1.3001, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3626827001571655, + "rewards/margins": 0.18897925317287445, + "rewards/rejected": -1.551661729812622, + "step": 162 + }, + { + "epoch": 0.34463164849686156, + "grad_norm": 0.714146077632904, + "learning_rate": 8.292084317635419e-07, + "logits/chosen": -0.4060715436935425, + "logits/rejected": -0.3770482540130615, + "logps/chosen": -0.7176523208618164, + "logps/rejected": -0.7973593473434448, + "loss": 1.324, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4353046417236328, + "rewards/margins": 0.15941408276557922, + "rewards/rejected": -1.5947186946868896, + "step": 163 + }, + { + "epoch": 0.34674595308886685, + "grad_norm": 1.6007037162780762, + "learning_rate": 8.264110590952607e-07, + "logits/chosen": -0.49063974618911743, + "logits/rejected": -0.5119628310203552, + "logps/chosen": -0.7263911366462708, + "logps/rejected": -0.9138184785842896, + "loss": 1.2439, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.4527822732925415, + "rewards/margins": 0.3748546540737152, + "rewards/rejected": -1.827636957168579, + "step": 164 + }, + { + "epoch": 0.34886025768087214, + "grad_norm": 1.4566830396652222, + "learning_rate": 8.235957667197494e-07, + "logits/chosen": -0.4681779146194458, + "logits/rejected": -0.46475380659103394, + "logps/chosen": -0.6923782229423523, + "logps/rejected": -0.7901281118392944, + "loss": 1.295, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.3847564458847046, + "rewards/margins": 0.19549959897994995, + "rewards/rejected": -1.5802562236785889, + "step": 165 + }, + { + "epoch": 0.35097456227287743, + "grad_norm": 3.0825328826904297, + "learning_rate": 8.207627091943177e-07, + "logits/chosen": -0.4294862151145935, + "logits/rejected": -0.42411237955093384, + "logps/chosen": -0.6851246356964111, + "logps/rejected": -0.7844961881637573, + "loss": 1.2871, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3702492713928223, + "rewards/margins": 0.19874317944049835, + "rewards/rejected": -1.5689923763275146, + "step": 166 + }, + { + "epoch": 0.3530888668648827, + "grad_norm": 1.0783339738845825, + "learning_rate": 8.179120420515675e-07, + "logits/chosen": -0.4528030455112457, + "logits/rejected": -0.4626815617084503, + "logps/chosen": -0.703376293182373, + "logps/rejected": -0.8752757906913757, + "loss": 1.2193, + "rewards/accuracies": 0.703125, + "rewards/chosen": -1.406752586364746, + "rewards/margins": 0.34379899501800537, + "rewards/rejected": -1.7505515813827515, + "step": 167 + }, + { + "epoch": 0.355203171456888, + "grad_norm": 2.6788036823272705, + "learning_rate": 8.150439217908556e-07, + "logits/chosen": -0.44946759939193726, + "logits/rejected": -0.47430264949798584, + "logps/chosen": -0.751136839389801, + "logps/rejected": -0.874577522277832, + "loss": 1.29, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.502273678779602, + "rewards/margins": 0.24688144028186798, + "rewards/rejected": -1.749155044555664, + "step": 168 + }, + { + "epoch": 0.3573174760488933, + "grad_norm": 0.9087730050086975, + "learning_rate": 8.121585058696999e-07, + "logits/chosen": -0.47294262051582336, + "logits/rejected": -0.46765226125717163, + "logps/chosen": -0.7291173934936523, + "logps/rejected": -0.7999277114868164, + "loss": 1.3482, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4582347869873047, + "rewards/margins": 0.1416206806898117, + "rewards/rejected": -1.5998554229736328, + "step": 169 + }, + { + "epoch": 0.3594317806408986, + "grad_norm": 3.392674207687378, + "learning_rate": 8.092559526951374e-07, + "logits/chosen": -0.5026620626449585, + "logits/rejected": -0.46620574593544006, + "logps/chosen": -0.746992290019989, + "logps/rejected": -0.8266301155090332, + "loss": 1.3202, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.493984580039978, + "rewards/margins": 0.15927578508853912, + "rewards/rejected": -1.6532602310180664, + "step": 170 + }, + { + "epoch": 0.3615460852329039, + "grad_norm": 1.27628755569458, + "learning_rate": 8.063364216150256e-07, + "logits/chosen": -0.5211395025253296, + "logits/rejected": -0.5419963598251343, + "logps/chosen": -0.7919114828109741, + "logps/rejected": -0.8731362223625183, + "loss": 1.3228, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5838229656219482, + "rewards/margins": 0.16244953870773315, + "rewards/rejected": -1.7462724447250366, + "step": 171 + }, + { + "epoch": 0.36366038982490917, + "grad_norm": 0.8269656896591187, + "learning_rate": 8.034000729092967e-07, + "logits/chosen": -0.49545183777809143, + "logits/rejected": -0.4716613292694092, + "logps/chosen": -0.719520092010498, + "logps/rejected": -0.7876347303390503, + "loss": 1.3367, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.439040184020996, + "rewards/margins": 0.13622930645942688, + "rewards/rejected": -1.5752694606781006, + "step": 172 + }, + { + "epoch": 0.36577469441691446, + "grad_norm": 0.6049383282661438, + "learning_rate": 8.004470677811559e-07, + "logits/chosen": -0.45276379585266113, + "logits/rejected": -0.42617955803871155, + "logps/chosen": -0.7097947597503662, + "logps/rejected": -0.7606989145278931, + "loss": 1.3909, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4195895195007324, + "rewards/margins": 0.10180822014808655, + "rewards/rejected": -1.5213978290557861, + "step": 173 + }, + { + "epoch": 0.36788899900891975, + "grad_norm": 3.980013847351074, + "learning_rate": 7.974775683482337e-07, + "logits/chosen": -0.4783569574356079, + "logits/rejected": -0.43521156907081604, + "logps/chosen": -0.7623491287231445, + "logps/rejected": -0.8719285130500793, + "loss": 1.2838, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.524698257446289, + "rewards/margins": 0.2191585898399353, + "rewards/rejected": -1.7438570261001587, + "step": 174 + }, + { + "epoch": 0.370003303600925, + "grad_norm": 1.024530053138733, + "learning_rate": 7.94491737633684e-07, + "logits/chosen": -0.5009916424751282, + "logits/rejected": -0.48874592781066895, + "logps/chosen": -0.7552992701530457, + "logps/rejected": -0.8485872745513916, + "loss": 1.3153, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5105985403060913, + "rewards/margins": 0.18657605350017548, + "rewards/rejected": -1.6971745491027832, + "step": 175 + }, + { + "epoch": 0.37211760819293027, + "grad_norm": 1.5952919721603394, + "learning_rate": 7.91489739557236e-07, + "logits/chosen": -0.4424138069152832, + "logits/rejected": -0.4334307312965393, + "logps/chosen": -0.6956002116203308, + "logps/rejected": -0.8018803000450134, + "loss": 1.3011, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.3912004232406616, + "rewards/margins": 0.21256020665168762, + "rewards/rejected": -1.6037606000900269, + "step": 176 + }, + { + "epoch": 0.37423191278493556, + "grad_norm": 1.8331164121627808, + "learning_rate": 7.884717389261934e-07, + "logits/chosen": -0.4836267828941345, + "logits/rejected": -0.5018677115440369, + "logps/chosen": -0.7895969152450562, + "logps/rejected": -0.927432656288147, + "loss": 1.2467, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5791938304901123, + "rewards/margins": 0.27567166090011597, + "rewards/rejected": -1.854865312576294, + "step": 177 + }, + { + "epoch": 0.37634621737694085, + "grad_norm": 2.165984869003296, + "learning_rate": 7.854379014263876e-07, + "logits/chosen": -0.46125832200050354, + "logits/rejected": -0.39802712202072144, + "logps/chosen": -0.8382925391197205, + "logps/rejected": -0.9422982931137085, + "loss": 1.339, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.676585078239441, + "rewards/margins": 0.20801125466823578, + "rewards/rejected": -1.884596586227417, + "step": 178 + }, + { + "epoch": 0.37846052196894614, + "grad_norm": 0.522197425365448, + "learning_rate": 7.823883936130817e-07, + "logits/chosen": -0.4747823476791382, + "logits/rejected": -0.4888593554496765, + "logps/chosen": -0.723059892654419, + "logps/rejected": -0.84626305103302, + "loss": 1.2708, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.446119785308838, + "rewards/margins": 0.24640652537345886, + "rewards/rejected": -1.69252610206604, + "step": 179 + }, + { + "epoch": 0.38057482656095143, + "grad_norm": 1.9690748453140259, + "learning_rate": 7.793233829018262e-07, + "logits/chosen": -0.5430271625518799, + "logits/rejected": -0.5403288006782532, + "logps/chosen": -0.8244275450706482, + "logps/rejected": -0.9133931994438171, + "loss": 1.3306, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6488550901412964, + "rewards/margins": 0.17793115973472595, + "rewards/rejected": -1.8267863988876343, + "step": 180 + }, + { + "epoch": 0.3826891311529567, + "grad_norm": 2.9181363582611084, + "learning_rate": 7.762430375592688e-07, + "logits/chosen": -0.4843495786190033, + "logits/rejected": -0.47929176688194275, + "logps/chosen": -0.8097372055053711, + "logps/rejected": -0.8973760008811951, + "loss": 1.3283, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6194744110107422, + "rewards/margins": 0.17527759075164795, + "rewards/rejected": -1.7947520017623901, + "step": 181 + }, + { + "epoch": 0.384803435744962, + "grad_norm": 4.227083683013916, + "learning_rate": 7.731475266939158e-07, + "logits/chosen": -0.5047686696052551, + "logits/rejected": -0.4921850264072418, + "logps/chosen": -0.875984787940979, + "logps/rejected": -1.0406755208969116, + "loss": 1.3169, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.751969575881958, + "rewards/margins": 0.32938146591186523, + "rewards/rejected": -2.0813510417938232, + "step": 182 + }, + { + "epoch": 0.3869177403369673, + "grad_norm": 1.2871490716934204, + "learning_rate": 7.700370202468489e-07, + "logits/chosen": -0.5123783349990845, + "logits/rejected": -0.55179762840271, + "logps/chosen": -0.8869211077690125, + "logps/rejected": -1.1082773208618164, + "loss": 1.216, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.773842215538025, + "rewards/margins": 0.4427123963832855, + "rewards/rejected": -2.216554641723633, + "step": 183 + }, + { + "epoch": 0.3890320449289726, + "grad_norm": 1.3015679121017456, + "learning_rate": 7.669116889823954e-07, + "logits/chosen": -0.49182361364364624, + "logits/rejected": -0.5180585384368896, + "logps/chosen": -0.8816227912902832, + "logps/rejected": -0.9516821503639221, + "loss": 1.3449, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7632455825805664, + "rewards/margins": 0.14011862874031067, + "rewards/rejected": -1.9033643007278442, + "step": 184 + }, + { + "epoch": 0.3911463495209779, + "grad_norm": 4.280956268310547, + "learning_rate": 7.637717044787526e-07, + "logits/chosen": -0.5702117681503296, + "logits/rejected": -0.5475804209709167, + "logps/chosen": -0.9307697415351868, + "logps/rejected": -1.0322346687316895, + "loss": 1.3434, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.8615394830703735, + "rewards/margins": 0.20292985439300537, + "rewards/rejected": -2.064469337463379, + "step": 185 + }, + { + "epoch": 0.39326065411298317, + "grad_norm": 1.3511455059051514, + "learning_rate": 7.606172391185699e-07, + "logits/chosen": -0.5466108322143555, + "logits/rejected": -0.551085352897644, + "logps/chosen": -1.0657893419265747, + "logps/rejected": -1.15786612033844, + "loss": 1.3549, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -2.1315786838531494, + "rewards/margins": 0.18415334820747375, + "rewards/rejected": -2.31573224067688, + "step": 186 + }, + { + "epoch": 0.39537495870498846, + "grad_norm": 0.7001176476478577, + "learning_rate": 7.574484660794836e-07, + "logits/chosen": -0.4849010407924652, + "logits/rejected": -0.5057946443557739, + "logps/chosen": -1.0784757137298584, + "logps/rejected": -1.2035218477249146, + "loss": 1.3556, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.156951427459717, + "rewards/margins": 0.25009211897850037, + "rewards/rejected": -2.407043695449829, + "step": 187 + }, + { + "epoch": 0.39748926329699374, + "grad_norm": 3.1405649185180664, + "learning_rate": 7.542655593246103e-07, + "logits/chosen": -0.5316596031188965, + "logits/rejected": -0.5658366680145264, + "logps/chosen": -1.0630009174346924, + "logps/rejected": -1.2867177724838257, + "loss": 1.2612, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.1260018348693848, + "rewards/margins": 0.447433739900589, + "rewards/rejected": -2.5734355449676514, + "step": 188 + }, + { + "epoch": 0.39960356788899903, + "grad_norm": 2.142986297607422, + "learning_rate": 7.510686935929962e-07, + "logits/chosen": -0.5959028005599976, + "logits/rejected": -0.5836039781570435, + "logps/chosen": -1.111003041267395, + "logps/rejected": -1.1858208179473877, + "loss": 1.3958, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.22200608253479, + "rewards/margins": 0.149635449051857, + "rewards/rejected": -2.3716416358947754, + "step": 189 + }, + { + "epoch": 0.40171787248100427, + "grad_norm": 1.9227335453033447, + "learning_rate": 7.478580443900246e-07, + "logits/chosen": -0.607532799243927, + "logits/rejected": -0.6102017760276794, + "logps/chosen": -1.3353261947631836, + "logps/rejected": -1.3975369930267334, + "loss": 1.457, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -2.670652389526367, + "rewards/margins": 0.12442154437303543, + "rewards/rejected": -2.795073986053467, + "step": 190 + }, + { + "epoch": 0.40383217707300956, + "grad_norm": 0.8509105443954468, + "learning_rate": 7.446337879777802e-07, + "logits/chosen": -0.5903070569038391, + "logits/rejected": -0.5728173851966858, + "logps/chosen": -1.27094566822052, + "logps/rejected": -1.3024815320968628, + "loss": 1.4953, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.54189133644104, + "rewards/margins": 0.06307169049978256, + "rewards/rejected": -2.6049630641937256, + "step": 191 + }, + { + "epoch": 0.40594648166501485, + "grad_norm": 1.1561088562011719, + "learning_rate": 7.413961013653725e-07, + "logits/chosen": -0.5578102469444275, + "logits/rejected": -0.5907329320907593, + "logps/chosen": -1.3817013502120972, + "logps/rejected": -1.419295072555542, + "loss": 1.4865, + "rewards/accuracies": 0.515625, + "rewards/chosen": -2.7634027004241943, + "rewards/margins": 0.07518734782934189, + "rewards/rejected": -2.838590145111084, + "step": 192 + }, + { + "epoch": 0.40806078625702014, + "grad_norm": 8.165387153625488, + "learning_rate": 7.381451622992183e-07, + "logits/chosen": -0.5213198661804199, + "logits/rejected": -0.5392848253250122, + "logps/chosen": -1.1798306703567505, + "logps/rejected": -1.2692899703979492, + "loss": 1.3971, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.359661340713501, + "rewards/margins": 0.17891867458820343, + "rewards/rejected": -2.5385799407958984, + "step": 193 + }, + { + "epoch": 0.4101750908490254, + "grad_norm": 1.2850884199142456, + "learning_rate": 7.348811492532839e-07, + "logits/chosen": -0.5382787585258484, + "logits/rejected": -0.5274642705917358, + "logps/chosen": -1.242587685585022, + "logps/rejected": -1.272438645362854, + "loss": 1.4795, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.485175371170044, + "rewards/margins": 0.05970197170972824, + "rewards/rejected": -2.544877290725708, + "step": 194 + }, + { + "epoch": 0.4122893954410307, + "grad_norm": 4.910929203033447, + "learning_rate": 7.316042414192864e-07, + "logits/chosen": -0.6186666488647461, + "logits/rejected": -0.6255884170532227, + "logps/chosen": -1.1743704080581665, + "logps/rejected": -1.2720146179199219, + "loss": 1.4127, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.348740816116333, + "rewards/margins": 0.19528816640377045, + "rewards/rejected": -2.5440292358398438, + "step": 195 + }, + { + "epoch": 0.414403700033036, + "grad_norm": 4.270901203155518, + "learning_rate": 7.283146186968565e-07, + "logits/chosen": -0.5861366987228394, + "logits/rejected": -0.6005197763442993, + "logps/chosen": -1.2127022743225098, + "logps/rejected": -1.3036490678787231, + "loss": 1.4067, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.4254045486450195, + "rewards/margins": 0.18189355731010437, + "rewards/rejected": -2.6072981357574463, + "step": 196 + }, + { + "epoch": 0.4165180046250413, + "grad_norm": 0.3070116639137268, + "learning_rate": 7.250124616836622e-07, + "logits/chosen": -0.6026022434234619, + "logits/rejected": -0.5920048952102661, + "logps/chosen": -1.0706496238708496, + "logps/rejected": -1.2879594564437866, + "loss": 1.2465, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.141299247741699, + "rewards/margins": 0.4346192479133606, + "rewards/rejected": -2.5759189128875732, + "step": 197 + }, + { + "epoch": 0.4186323092170466, + "grad_norm": 1.160252571105957, + "learning_rate": 7.216979516654943e-07, + "logits/chosen": -0.5808722376823425, + "logits/rejected": -0.5770124197006226, + "logps/chosen": -1.0426011085510254, + "logps/rejected": -1.1295092105865479, + "loss": 1.4244, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.085202217102051, + "rewards/margins": 0.1738162338733673, + "rewards/rejected": -2.2590184211730957, + "step": 198 + }, + { + "epoch": 0.4207466138090519, + "grad_norm": 4.6966471672058105, + "learning_rate": 7.183712706063132e-07, + "logits/chosen": -0.5958350896835327, + "logits/rejected": -0.6440161466598511, + "logps/chosen": -0.981076717376709, + "logps/rejected": -1.1257147789001465, + "loss": 1.3175, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.962153434753418, + "rewards/margins": 0.28927627205848694, + "rewards/rejected": -2.251429557800293, + "step": 199 + }, + { + "epoch": 0.42286091840105716, + "grad_norm": 2.9395248889923096, + "learning_rate": 7.150326011382603e-07, + "logits/chosen": -0.5647889375686646, + "logits/rejected": -0.5762943625450134, + "logps/chosen": -0.8101261854171753, + "logps/rejected": -1.0001438856124878, + "loss": 1.2135, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6202523708343506, + "rewards/margins": 0.38003528118133545, + "rewards/rejected": -2.0002877712249756, + "step": 200 + }, + { + "epoch": 0.42497522299306245, + "grad_norm": 1.2575147151947021, + "learning_rate": 7.116821265516306e-07, + "logits/chosen": -0.5834293961524963, + "logits/rejected": -0.5929508805274963, + "logps/chosen": -0.8768399953842163, + "logps/rejected": -1.0942046642303467, + "loss": 1.219, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7536799907684326, + "rewards/margins": 0.43472927808761597, + "rewards/rejected": -2.1884093284606934, + "step": 201 + }, + { + "epoch": 0.42708952758506774, + "grad_norm": 1.4035751819610596, + "learning_rate": 7.083200307848115e-07, + "logits/chosen": -0.5424078106880188, + "logits/rejected": -0.5316082239151001, + "logps/chosen": -0.8791903257369995, + "logps/rejected": -0.9323580265045166, + "loss": 1.3675, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.758380651473999, + "rewards/margins": 0.10633517056703568, + "rewards/rejected": -1.8647160530090332, + "step": 202 + }, + { + "epoch": 0.42920383217707303, + "grad_norm": 1.8622503280639648, + "learning_rate": 7.049464984141829e-07, + "logits/chosen": -0.5329294204711914, + "logits/rejected": -0.5523126721382141, + "logps/chosen": -0.695776104927063, + "logps/rejected": -0.8400713801383972, + "loss": 1.2285, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.391552209854126, + "rewards/margins": 0.28859058022499084, + "rewards/rejected": -1.6801427602767944, + "step": 203 + }, + { + "epoch": 0.4313181367690783, + "grad_norm": 0.8603182435035706, + "learning_rate": 7.015617146439861e-07, + "logits/chosen": -0.4516752064228058, + "logits/rejected": -0.46907976269721985, + "logps/chosen": -0.6868133544921875, + "logps/rejected": -0.8646677732467651, + "loss": 1.2417, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.373626708984375, + "rewards/margins": 0.355709046125412, + "rewards/rejected": -1.7293355464935303, + "step": 204 + }, + { + "epoch": 0.43343244136108355, + "grad_norm": 0.6437748670578003, + "learning_rate": 6.981658652961546e-07, + "logits/chosen": -0.6159051656723022, + "logits/rejected": -0.6000130772590637, + "logps/chosen": -0.7715178728103638, + "logps/rejected": -0.8714219331741333, + "loss": 1.3469, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5430357456207275, + "rewards/margins": 0.19980813562870026, + "rewards/rejected": -1.7428438663482666, + "step": 205 + }, + { + "epoch": 0.43554674595308884, + "grad_norm": 1.2309322357177734, + "learning_rate": 6.947591368001137e-07, + "logits/chosen": -0.5913614630699158, + "logits/rejected": -0.6128537654876709, + "logps/chosen": -0.7512561678886414, + "logps/rejected": -0.8872793912887573, + "loss": 1.26, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5025123357772827, + "rewards/margins": 0.2720465660095215, + "rewards/rejected": -1.7745587825775146, + "step": 206 + }, + { + "epoch": 0.43766105054509413, + "grad_norm": 0.6153685450553894, + "learning_rate": 6.913417161825449e-07, + "logits/chosen": -0.5976595878601074, + "logits/rejected": -0.6222202181816101, + "logps/chosen": -0.837669849395752, + "logps/rejected": -0.9835771918296814, + "loss": 1.2986, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.675339698791504, + "rewards/margins": 0.2918146252632141, + "rewards/rejected": -1.9671543836593628, + "step": 207 + }, + { + "epoch": 0.4397753551370994, + "grad_norm": 1.9922760725021362, + "learning_rate": 6.87913791057119e-07, + "logits/chosen": -0.6808818578720093, + "logits/rejected": -0.6692708730697632, + "logps/chosen": -0.7088961601257324, + "logps/rejected": -0.8256410360336304, + "loss": 1.281, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4177923202514648, + "rewards/margins": 0.23348984122276306, + "rewards/rejected": -1.6512820720672607, + "step": 208 + }, + { + "epoch": 0.4418896597291047, + "grad_norm": 1.9562067985534668, + "learning_rate": 6.844755496141961e-07, + "logits/chosen": -0.5282632112503052, + "logits/rejected": -0.5692226886749268, + "logps/chosen": -0.7235382795333862, + "logps/rejected": -0.801092803478241, + "loss": 1.3227, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4470765590667725, + "rewards/margins": 0.1551089584827423, + "rewards/rejected": -1.602185606956482, + "step": 209 + }, + { + "epoch": 0.44400396432111, + "grad_norm": 0.8182584047317505, + "learning_rate": 6.81027180610493e-07, + "logits/chosen": -0.6418904662132263, + "logits/rejected": -0.5941328406333923, + "logps/chosen": -0.820648729801178, + "logps/rejected": -0.8864803910255432, + "loss": 1.3498, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.641297459602356, + "rewards/margins": 0.13166317343711853, + "rewards/rejected": -1.7729607820510864, + "step": 210 + }, + { + "epoch": 0.4461182689131153, + "grad_norm": 3.075260877609253, + "learning_rate": 6.775688733587227e-07, + "logits/chosen": -0.5926809906959534, + "logits/rejected": -0.5844541788101196, + "logps/chosen": -0.7822425365447998, + "logps/rejected": -0.8866626024246216, + "loss": 1.2884, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5644850730895996, + "rewards/margins": 0.20884013175964355, + "rewards/rejected": -1.7733252048492432, + "step": 211 + }, + { + "epoch": 0.4482325735051206, + "grad_norm": 0.8032744526863098, + "learning_rate": 6.741008177171993e-07, + "logits/chosen": -0.579971432685852, + "logits/rejected": -0.5978566408157349, + "logps/chosen": -0.721234917640686, + "logps/rejected": -0.8368514180183411, + "loss": 1.2781, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.442469835281372, + "rewards/margins": 0.23123310506343842, + "rewards/rejected": -1.6737028360366821, + "step": 212 + }, + { + "epoch": 0.45034687809712587, + "grad_norm": 0.6680911779403687, + "learning_rate": 6.706232040794161e-07, + "logits/chosen": -0.6748596429824829, + "logits/rejected": -0.6615546941757202, + "logps/chosen": -0.7931480407714844, + "logps/rejected": -0.8879257440567017, + "loss": 1.337, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5862960815429688, + "rewards/margins": 0.1895553171634674, + "rewards/rejected": -1.7758514881134033, + "step": 213 + }, + { + "epoch": 0.45246118268913116, + "grad_norm": 2.5107688903808594, + "learning_rate": 6.671362233635925e-07, + "logits/chosen": -0.6460363268852234, + "logits/rejected": -0.6273557543754578, + "logps/chosen": -0.823783814907074, + "logps/rejected": -0.87412428855896, + "loss": 1.3756, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.647567629814148, + "rewards/margins": 0.10068092495203018, + "rewards/rejected": -1.74824857711792, + "step": 214 + }, + { + "epoch": 0.45457548728113645, + "grad_norm": 2.2206740379333496, + "learning_rate": 6.636400670021933e-07, + "logits/chosen": -0.6295229196548462, + "logits/rejected": -0.6330893039703369, + "logps/chosen": -0.807812511920929, + "logps/rejected": -0.9784457683563232, + "loss": 1.2259, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.615625023841858, + "rewards/margins": 0.3412665128707886, + "rewards/rejected": -1.9568915367126465, + "step": 215 + }, + { + "epoch": 0.45668979187314174, + "grad_norm": 1.2925803661346436, + "learning_rate": 6.601349269314187e-07, + "logits/chosen": -0.6001027822494507, + "logits/rejected": -0.6305864453315735, + "logps/chosen": -0.7216315865516663, + "logps/rejected": -0.8616191744804382, + "loss": 1.269, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.4432631731033325, + "rewards/margins": 0.2799749970436096, + "rewards/rejected": -1.7232383489608765, + "step": 216 + }, + { + "epoch": 0.458804096465147, + "grad_norm": 4.863992214202881, + "learning_rate": 6.566209955806679e-07, + "logits/chosen": -0.5307935476303101, + "logits/rejected": -0.5385264754295349, + "logps/chosen": -0.8053566813468933, + "logps/rejected": -0.9241464734077454, + "loss": 1.3325, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.6107133626937866, + "rewards/margins": 0.23757943511009216, + "rewards/rejected": -1.8482929468154907, + "step": 217 + }, + { + "epoch": 0.4609184010571523, + "grad_norm": 1.0189604759216309, + "learning_rate": 6.530984658619733e-07, + "logits/chosen": -0.7031885385513306, + "logits/rejected": -0.7072005867958069, + "logps/chosen": -0.8382629752159119, + "logps/rejected": -0.9468755722045898, + "loss": 1.3276, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.6765259504318237, + "rewards/margins": 0.21722503006458282, + "rewards/rejected": -1.8937511444091797, + "step": 218 + }, + { + "epoch": 0.4630327056491576, + "grad_norm": 1.1178699731826782, + "learning_rate": 6.495675311594122e-07, + "logits/chosen": -0.5736142992973328, + "logits/rejected": -0.5926069021224976, + "logps/chosen": -0.7676032781600952, + "logps/rejected": -0.9179919958114624, + "loss": 1.278, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5352065563201904, + "rewards/margins": 0.3007773756980896, + "rewards/rejected": -1.8359839916229248, + "step": 219 + }, + { + "epoch": 0.4651470102411629, + "grad_norm": 2.4985287189483643, + "learning_rate": 6.460283853184879e-07, + "logits/chosen": -0.6372602581977844, + "logits/rejected": -0.6313104033470154, + "logps/chosen": -0.8754556179046631, + "logps/rejected": -0.9803894758224487, + "loss": 1.3166, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7509112358093262, + "rewards/margins": 0.2098677009344101, + "rewards/rejected": -1.9607789516448975, + "step": 220 + }, + { + "epoch": 0.46726131483316813, + "grad_norm": 1.5675435066223145, + "learning_rate": 6.424812226354889e-07, + "logits/chosen": -0.6377983093261719, + "logits/rejected": -0.6666730642318726, + "logps/chosen": -0.7556843757629395, + "logps/rejected": -0.9096466302871704, + "loss": 1.2397, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.511368751525879, + "rewards/margins": 0.30792441964149475, + "rewards/rejected": -1.8192932605743408, + "step": 221 + }, + { + "epoch": 0.4693756194251734, + "grad_norm": 2.853426218032837, + "learning_rate": 6.389262378468219e-07, + "logits/chosen": -0.6055567860603333, + "logits/rejected": -0.612144947052002, + "logps/chosen": -0.8588352203369141, + "logps/rejected": -0.8928595185279846, + "loss": 1.4022, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7176704406738281, + "rewards/margins": 0.06804870069026947, + "rewards/rejected": -1.7857190370559692, + "step": 222 + }, + { + "epoch": 0.4714899240171787, + "grad_norm": 0.528042733669281, + "learning_rate": 6.353636261183213e-07, + "logits/chosen": -0.6543641090393066, + "logits/rejected": -0.6635830402374268, + "logps/chosen": -0.7858147621154785, + "logps/rejected": -0.9400445222854614, + "loss": 1.2446, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.571629524230957, + "rewards/margins": 0.3084595203399658, + "rewards/rejected": -1.8800890445709229, + "step": 223 + }, + { + "epoch": 0.473604228609184, + "grad_norm": 1.1155768632888794, + "learning_rate": 6.317935830345338e-07, + "logits/chosen": -0.5700349807739258, + "logits/rejected": -0.6560614705085754, + "logps/chosen": -0.8426170945167542, + "logps/rejected": -0.9983471035957336, + "loss": 1.3204, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6852341890335083, + "rewards/margins": 0.3114599883556366, + "rewards/rejected": -1.9966942071914673, + "step": 224 + }, + { + "epoch": 0.4757185332011893, + "grad_norm": 0.802669107913971, + "learning_rate": 6.282163045879823e-07, + "logits/chosen": -0.6912901401519775, + "logits/rejected": -0.7201069593429565, + "logps/chosen": -0.8135342597961426, + "logps/rejected": -0.9537283182144165, + "loss": 1.2961, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6270685195922852, + "rewards/margins": 0.2803882658481598, + "rewards/rejected": -1.907456636428833, + "step": 225 + }, + { + "epoch": 0.4778328377931946, + "grad_norm": 1.709757924079895, + "learning_rate": 6.246319871684047e-07, + "logits/chosen": -0.7573816776275635, + "logits/rejected": -0.8028420209884644, + "logps/chosen": -0.891952633857727, + "logps/rejected": -1.0168029069900513, + "loss": 1.333, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.783905267715454, + "rewards/margins": 0.24970072507858276, + "rewards/rejected": -2.0336058139801025, + "step": 226 + }, + { + "epoch": 0.47994714238519987, + "grad_norm": 2.170957326889038, + "learning_rate": 6.210408275519734e-07, + "logits/chosen": -0.6915597915649414, + "logits/rejected": -0.7027997970581055, + "logps/chosen": -0.9063036441802979, + "logps/rejected": -1.0104373693466187, + "loss": 1.3388, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8126072883605957, + "rewards/margins": 0.20826762914657593, + "rewards/rejected": -2.0208747386932373, + "step": 227 + }, + { + "epoch": 0.48206144697720515, + "grad_norm": 1.8802261352539062, + "learning_rate": 6.174430228904919e-07, + "logits/chosen": -0.689726710319519, + "logits/rejected": -0.7143282890319824, + "logps/chosen": -0.7480812072753906, + "logps/rejected": -0.8698041439056396, + "loss": 1.2836, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4961624145507812, + "rewards/margins": 0.24344584345817566, + "rewards/rejected": -1.7396082878112793, + "step": 228 + }, + { + "epoch": 0.48417575156921044, + "grad_norm": 2.5202934741973877, + "learning_rate": 6.13838770700571e-07, + "logits/chosen": -0.6858299374580383, + "logits/rejected": -0.7115206122398376, + "logps/chosen": -0.8575515151023865, + "logps/rejected": -0.9657347202301025, + "loss": 1.3046, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.715103030204773, + "rewards/margins": 0.21636635065078735, + "rewards/rejected": -1.931469440460205, + "step": 229 + }, + { + "epoch": 0.48629005616121573, + "grad_norm": 1.268512487411499, + "learning_rate": 6.102282688527859e-07, + "logits/chosen": -0.7078689932823181, + "logits/rejected": -0.7254161238670349, + "logps/chosen": -0.8850880861282349, + "logps/rejected": -1.031385898590088, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7701761722564697, + "rewards/margins": 0.29259535670280457, + "rewards/rejected": -2.062771797180176, + "step": 230 + }, + { + "epoch": 0.488404360753221, + "grad_norm": 1.7285584211349487, + "learning_rate": 6.066117155608135e-07, + "logits/chosen": -0.7325868606567383, + "logits/rejected": -0.7433226108551025, + "logps/chosen": -0.8014956116676331, + "logps/rejected": -0.9653260111808777, + "loss": 1.2429, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.6029912233352661, + "rewards/margins": 0.32766085863113403, + "rewards/rejected": -1.9306520223617554, + "step": 231 + }, + { + "epoch": 0.4905186653452263, + "grad_norm": 0.6270304322242737, + "learning_rate": 6.029893093705491e-07, + "logits/chosen": -0.692166805267334, + "logits/rejected": -0.6799293756484985, + "logps/chosen": -0.7850213646888733, + "logps/rejected": -0.8839574456214905, + "loss": 1.2967, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.5700427293777466, + "rewards/margins": 0.19787229597568512, + "rewards/rejected": -1.767914891242981, + "step": 232 + }, + { + "epoch": 0.4926329699372316, + "grad_norm": 1.0160484313964844, + "learning_rate": 5.993612491492087e-07, + "logits/chosen": -0.7095844149589539, + "logits/rejected": -0.71524578332901, + "logps/chosen": -0.7063854336738586, + "logps/rejected": -0.8855549097061157, + "loss": 1.2176, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.4127708673477173, + "rewards/margins": 0.3583390712738037, + "rewards/rejected": -1.7711098194122314, + "step": 233 + }, + { + "epoch": 0.4947472745292369, + "grad_norm": 2.225841999053955, + "learning_rate": 5.957277340744094e-07, + "logits/chosen": -0.7488946318626404, + "logits/rejected": -0.7588428854942322, + "logps/chosen": -0.9203822612762451, + "logps/rejected": -1.0089298486709595, + "loss": 1.355, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8407645225524902, + "rewards/margins": 0.17709502577781677, + "rewards/rejected": -2.017859697341919, + "step": 234 + }, + { + "epoch": 0.4968615791212422, + "grad_norm": 1.9577795267105103, + "learning_rate": 5.920889636232351e-07, + "logits/chosen": -0.8078997731208801, + "logits/rejected": -0.8064825534820557, + "logps/chosen": -0.8004480004310608, + "logps/rejected": -0.9856831431388855, + "loss": 1.2273, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.6008960008621216, + "rewards/margins": 0.3704703152179718, + "rewards/rejected": -1.971366286277771, + "step": 235 + }, + { + "epoch": 0.4989758837132474, + "grad_norm": 2.5050246715545654, + "learning_rate": 5.884451375612865e-07, + "logits/chosen": -0.7499472498893738, + "logits/rejected": -0.7421904802322388, + "logps/chosen": -0.8363584876060486, + "logps/rejected": -0.9543781876564026, + "loss": 1.3002, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6727169752120972, + "rewards/margins": 0.23603934049606323, + "rewards/rejected": -1.9087563753128052, + "step": 236 + }, + { + "epoch": 0.5010901883052528, + "grad_norm": 0.585436224937439, + "learning_rate": 5.847964559317128e-07, + "logits/chosen": -0.730015218257904, + "logits/rejected": -0.7154791355133057, + "logps/chosen": -0.8828849196434021, + "logps/rejected": -0.9897070527076721, + "loss": 1.347, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.7657698392868042, + "rewards/margins": 0.21364440023899078, + "rewards/rejected": -1.9794141054153442, + "step": 237 + }, + { + "epoch": 0.503204492897258, + "grad_norm": 0.9204092621803284, + "learning_rate": 5.8114311904423e-07, + "logits/chosen": -0.759974479675293, + "logits/rejected": -0.7793674468994141, + "logps/chosen": -0.8321584463119507, + "logps/rejected": -1.0809751749038696, + "loss": 1.2185, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6643168926239014, + "rewards/margins": 0.4976334273815155, + "rewards/rejected": -2.1619503498077393, + "step": 238 + }, + { + "epoch": 0.5053187974892633, + "grad_norm": 5.147011756896973, + "learning_rate": 5.774853274641243e-07, + "logits/chosen": -0.7148956060409546, + "logits/rejected": -0.7363921403884888, + "logps/chosen": -0.8623124361038208, + "logps/rejected": -1.0681498050689697, + "loss": 1.2353, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7246248722076416, + "rewards/margins": 0.4116746187210083, + "rewards/rejected": -2.1362996101379395, + "step": 239 + }, + { + "epoch": 0.5074331020812686, + "grad_norm": 1.9065529108047485, + "learning_rate": 5.738232820012407e-07, + "logits/chosen": -0.7158540487289429, + "logits/rejected": -0.7083900570869446, + "logps/chosen": -0.981558620929718, + "logps/rejected": -1.054612636566162, + "loss": 1.3594, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.963117241859436, + "rewards/margins": 0.14610806107521057, + "rewards/rejected": -2.109225273132324, + "step": 240 + }, + { + "epoch": 0.5095474066732739, + "grad_norm": 2.4411256313323975, + "learning_rate": 5.701571836989591e-07, + "logits/chosen": -0.8441444039344788, + "logits/rejected": -0.8529233336448669, + "logps/chosen": -0.8665949702262878, + "logps/rejected": -1.030572772026062, + "loss": 1.2477, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.7331899404525757, + "rewards/margins": 0.3279556334018707, + "rewards/rejected": -2.061145544052124, + "step": 241 + }, + { + "epoch": 0.5116617112652792, + "grad_norm": 2.461113214492798, + "learning_rate": 5.664872338231571e-07, + "logits/chosen": -0.7463312149047852, + "logits/rejected": -0.7725105285644531, + "logps/chosen": -0.9185941815376282, + "logps/rejected": -1.1244423389434814, + "loss": 1.2404, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.8371883630752563, + "rewards/margins": 0.411696195602417, + "rewards/rejected": -2.248884677886963, + "step": 242 + }, + { + "epoch": 0.5137760158572844, + "grad_norm": 3.5861761569976807, + "learning_rate": 5.628136338511607e-07, + "logits/chosen": -0.8432914018630981, + "logits/rejected": -0.85801100730896, + "logps/chosen": -0.8873915672302246, + "logps/rejected": -1.0090795755386353, + "loss": 1.3072, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.7747831344604492, + "rewards/margins": 0.24337637424468994, + "rewards/rejected": -2.0181591510772705, + "step": 243 + }, + { + "epoch": 0.5158903204492897, + "grad_norm": 2.109071969985962, + "learning_rate": 5.591365854606829e-07, + "logits/chosen": -0.7899532318115234, + "logits/rejected": -0.7548331618309021, + "logps/chosen": -0.9333330392837524, + "logps/rejected": -1.00949227809906, + "loss": 1.3749, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.8666660785675049, + "rewards/margins": 0.1523183286190033, + "rewards/rejected": -2.01898455619812, + "step": 244 + }, + { + "epoch": 0.518004625041295, + "grad_norm": 2.2017955780029297, + "learning_rate": 5.554562905187527e-07, + "logits/chosen": -0.7569047212600708, + "logits/rejected": -0.7679808735847473, + "logps/chosen": -0.9779613614082336, + "logps/rejected": -1.1713427305221558, + "loss": 1.2628, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.9559227228164673, + "rewards/margins": 0.3867628276348114, + "rewards/rejected": -2.3426854610443115, + "step": 245 + }, + { + "epoch": 0.5201189296333003, + "grad_norm": 4.651991367340088, + "learning_rate": 5.517729510706315e-07, + "logits/chosen": -0.8546395301818848, + "logits/rejected": -0.8609369397163391, + "logps/chosen": -0.9926605224609375, + "logps/rejected": -1.1553713083267212, + "loss": 1.2812, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.985321044921875, + "rewards/margins": 0.32542160153388977, + "rewards/rejected": -2.3107426166534424, + "step": 246 + }, + { + "epoch": 0.5222332342253055, + "grad_norm": 2.6384060382843018, + "learning_rate": 5.480867693287223e-07, + "logits/chosen": -0.7734386324882507, + "logits/rejected": -0.7963250875473022, + "logps/chosen": -0.8996341824531555, + "logps/rejected": -1.0466523170471191, + "loss": 1.2849, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.799268364906311, + "rewards/margins": 0.2940361201763153, + "rewards/rejected": -2.0933046340942383, + "step": 247 + }, + { + "epoch": 0.5243475388173109, + "grad_norm": 1.3608977794647217, + "learning_rate": 5.443979476614674e-07, + "logits/chosen": -0.7350472807884216, + "logits/rejected": -0.7215992212295532, + "logps/chosen": -0.8887076377868652, + "logps/rejected": -1.0147045850753784, + "loss": 1.3182, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7774152755737305, + "rewards/margins": 0.25199398398399353, + "rewards/rejected": -2.029409170150757, + "step": 248 + }, + { + "epoch": 0.5264618434093161, + "grad_norm": 3.017115354537964, + "learning_rate": 5.407066885822391e-07, + "logits/chosen": -0.827782154083252, + "logits/rejected": -0.8471929430961609, + "logps/chosen": -0.9262440800666809, + "logps/rejected": -1.1658306121826172, + "loss": 1.1882, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.8524881601333618, + "rewards/margins": 0.47917306423187256, + "rewards/rejected": -2.3316612243652344, + "step": 249 + }, + { + "epoch": 0.5285761480013215, + "grad_norm": 0.7805312275886536, + "learning_rate": 5.370131947382214e-07, + "logits/chosen": -0.7815499305725098, + "logits/rejected": -0.8279274702072144, + "logps/chosen": -0.968708872795105, + "logps/rejected": -1.2697322368621826, + "loss": 1.2092, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.93741774559021, + "rewards/margins": 0.6020466685295105, + "rewards/rejected": -2.5394644737243652, + "step": 250 + }, + { + "epoch": 0.5306904525933267, + "grad_norm": 2.229363441467285, + "learning_rate": 5.333176688992855e-07, + "logits/chosen": -0.7824153900146484, + "logits/rejected": -0.8154900074005127, + "logps/chosen": -1.0211957693099976, + "logps/rejected": -1.2145965099334717, + "loss": 1.3074, + "rewards/accuracies": 0.609375, + "rewards/chosen": -2.042391538619995, + "rewards/margins": 0.3868010938167572, + "rewards/rejected": -2.4291930198669434, + "step": 251 + }, + { + "epoch": 0.532804757185332, + "grad_norm": 1.1359837055206299, + "learning_rate": 5.296203139468571e-07, + "logits/chosen": -0.7467613220214844, + "logits/rejected": -0.7548531889915466, + "logps/chosen": -1.0614902973175049, + "logps/rejected": -1.2674376964569092, + "loss": 1.2512, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.1229805946350098, + "rewards/margins": 0.4118950664997101, + "rewards/rejected": -2.5348753929138184, + "step": 252 + }, + { + "epoch": 0.5349190617773373, + "grad_norm": 3.0548548698425293, + "learning_rate": 5.259213328627792e-07, + "logits/chosen": -0.7868636250495911, + "logits/rejected": -0.8130850791931152, + "logps/chosen": -1.0743666887283325, + "logps/rejected": -1.2010191679000854, + "loss": 1.3275, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.148733377456665, + "rewards/margins": 0.2533051669597626, + "rewards/rejected": -2.402038335800171, + "step": 253 + }, + { + "epoch": 0.5370333663693426, + "grad_norm": 1.7205246686935425, + "learning_rate": 5.222209287181676e-07, + "logits/chosen": -0.81404709815979, + "logits/rejected": -0.8481613397598267, + "logps/chosen": -1.1599587202072144, + "logps/rejected": -1.4234716892242432, + "loss": 1.2894, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.3199174404144287, + "rewards/margins": 0.5270256400108337, + "rewards/rejected": -2.8469433784484863, + "step": 254 + }, + { + "epoch": 0.5391476709613479, + "grad_norm": 2.2516112327575684, + "learning_rate": 5.185193046622634e-07, + "logits/chosen": -0.8112510442733765, + "logits/rejected": -0.8310728073120117, + "logps/chosen": -1.1263186931610107, + "logps/rejected": -1.3256827592849731, + "loss": 1.3552, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.2526373863220215, + "rewards/margins": 0.39872825145721436, + "rewards/rejected": -2.6513655185699463, + "step": 255 + }, + { + "epoch": 0.5412619755533532, + "grad_norm": 2.8379359245300293, + "learning_rate": 5.148166639112799e-07, + "logits/chosen": -0.8202102184295654, + "logits/rejected": -0.845209002494812, + "logps/chosen": -1.264180302619934, + "logps/rejected": -1.6190590858459473, + "loss": 1.2083, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.528360605239868, + "rewards/margins": 0.7097575068473816, + "rewards/rejected": -3.2381181716918945, + "step": 256 + }, + { + "epoch": 0.5433762801453584, + "grad_norm": 4.676355838775635, + "learning_rate": 5.111132097372459e-07, + "logits/chosen": -0.8866451978683472, + "logits/rejected": -0.8642281889915466, + "logps/chosen": -1.3194389343261719, + "logps/rejected": -1.4506916999816895, + "loss": 1.4002, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.6388778686523438, + "rewards/margins": 0.2625058591365814, + "rewards/rejected": -2.901383399963379, + "step": 257 + }, + { + "epoch": 0.5454905847373638, + "grad_norm": 2.55251407623291, + "learning_rate": 5.074091454568463e-07, + "logits/chosen": -0.7903708815574646, + "logits/rejected": -0.8010709881782532, + "logps/chosen": -1.3550961017608643, + "logps/rejected": -1.661428451538086, + "loss": 1.2131, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.7101922035217285, + "rewards/margins": 0.6126645803451538, + "rewards/rejected": -3.322856903076172, + "step": 258 + }, + { + "epoch": 0.547604889329369, + "grad_norm": 4.116572856903076, + "learning_rate": 5.037046744202611e-07, + "logits/chosen": -0.7501232624053955, + "logits/rejected": -0.7825176119804382, + "logps/chosen": -1.2111856937408447, + "logps/rejected": -1.5176191329956055, + "loss": 1.1345, + "rewards/accuracies": 0.71875, + "rewards/chosen": -2.4223713874816895, + "rewards/margins": 0.6128667593002319, + "rewards/rejected": -3.035238265991211, + "step": 259 + }, + { + "epoch": 0.5497191939213742, + "grad_norm": 2.0285205841064453, + "learning_rate": 5e-07, + "logits/chosen": -0.8355445861816406, + "logits/rejected": -0.8497716784477234, + "logps/chosen": -1.1876304149627686, + "logps/rejected": -1.4788450002670288, + "loss": 1.1559, + "rewards/accuracies": 0.671875, + "rewards/chosen": -2.375260829925537, + "rewards/margins": 0.5824294090270996, + "rewards/rejected": -2.9576900005340576, + "step": 260 + }, + { + "epoch": 0.5518334985133796, + "grad_norm": 4.681185245513916, + "learning_rate": 4.962953255797389e-07, + "logits/chosen": -0.8240503072738647, + "logits/rejected": -0.8016488552093506, + "logps/chosen": -1.2238959074020386, + "logps/rejected": -1.4727882146835327, + "loss": 1.2914, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.447791814804077, + "rewards/margins": 0.49778467416763306, + "rewards/rejected": -2.9455764293670654, + "step": 261 + }, + { + "epoch": 0.5539478031053848, + "grad_norm": 5.15679931640625, + "learning_rate": 4.925908545431537e-07, + "logits/chosen": -0.728940486907959, + "logits/rejected": -0.7355924248695374, + "logps/chosen": -1.3356778621673584, + "logps/rejected": -1.6726096868515015, + "loss": 1.1434, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.671355724334717, + "rewards/margins": 0.6738637685775757, + "rewards/rejected": -3.345219373703003, + "step": 262 + }, + { + "epoch": 0.5560621076973902, + "grad_norm": 2.481048345565796, + "learning_rate": 4.888867902627543e-07, + "logits/chosen": -0.8311591148376465, + "logits/rejected": -0.8191719055175781, + "logps/chosen": -1.2743335962295532, + "logps/rejected": -1.5339927673339844, + "loss": 1.2164, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.5486671924591064, + "rewards/margins": 0.5193185210227966, + "rewards/rejected": -3.0679855346679688, + "step": 263 + }, + { + "epoch": 0.5581764122893954, + "grad_norm": 3.6758291721343994, + "learning_rate": 4.851833360887201e-07, + "logits/chosen": -0.6787989735603333, + "logits/rejected": -0.668928325176239, + "logps/chosen": -1.2278664112091064, + "logps/rejected": -1.4955706596374512, + "loss": 1.1942, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -2.455732822418213, + "rewards/margins": 0.535408616065979, + "rewards/rejected": -2.9911413192749023, + "step": 264 + }, + { + "epoch": 0.5602907168814008, + "grad_norm": 2.7282023429870605, + "learning_rate": 4.814806953377365e-07, + "logits/chosen": -0.7772133350372314, + "logits/rejected": -0.7689889073371887, + "logps/chosen": -1.1954048871994019, + "logps/rejected": -1.444943904876709, + "loss": 1.2686, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.3908097743988037, + "rewards/margins": 0.4990782141685486, + "rewards/rejected": -2.889887809753418, + "step": 265 + }, + { + "epoch": 0.562405021473406, + "grad_norm": 2.8753116130828857, + "learning_rate": 4.777790712818323e-07, + "logits/chosen": -0.6946043968200684, + "logits/rejected": -0.7001516819000244, + "logps/chosen": -1.2844620943069458, + "logps/rejected": -1.486103892326355, + "loss": 1.284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.5689241886138916, + "rewards/margins": 0.4032836854457855, + "rewards/rejected": -2.97220778465271, + "step": 266 + }, + { + "epoch": 0.5645193260654113, + "grad_norm": 1.5583593845367432, + "learning_rate": 4.740786671372209e-07, + "logits/chosen": -0.7396820187568665, + "logits/rejected": -0.7129873037338257, + "logps/chosen": -1.410097599029541, + "logps/rejected": -1.6091456413269043, + "loss": 1.3158, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.820195198059082, + "rewards/margins": 0.3980959951877594, + "rewards/rejected": -3.2182912826538086, + "step": 267 + }, + { + "epoch": 0.5666336306574166, + "grad_norm": 3.5984952449798584, + "learning_rate": 4.703796860531429e-07, + "logits/chosen": -0.7031830549240112, + "logits/rejected": -0.700330376625061, + "logps/chosen": -1.633664608001709, + "logps/rejected": -1.9186874628067017, + "loss": 1.2479, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.267329216003418, + "rewards/margins": 0.5700456500053406, + "rewards/rejected": -3.8373749256134033, + "step": 268 + }, + { + "epoch": 0.5687479352494219, + "grad_norm": 6.295733451843262, + "learning_rate": 4.666823311007144e-07, + "logits/chosen": -0.8001950979232788, + "logits/rejected": -0.8042099475860596, + "logps/chosen": -1.5675832033157349, + "logps/rejected": -1.9247075319290161, + "loss": 1.1759, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.1351664066314697, + "rewards/margins": 0.7142485976219177, + "rewards/rejected": -3.8494150638580322, + "step": 269 + }, + { + "epoch": 0.5708622398414271, + "grad_norm": 3.6349036693573, + "learning_rate": 4.6298680526177855e-07, + "logits/chosen": -0.8108068704605103, + "logits/rejected": -0.8030902147293091, + "logps/chosen": -1.8205997943878174, + "logps/rejected": -2.195197105407715, + "loss": 1.1864, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.6411995887756348, + "rewards/margins": 0.7491948008537292, + "rewards/rejected": -4.39039421081543, + "step": 270 + }, + { + "epoch": 0.5729765444334325, + "grad_norm": 4.786395072937012, + "learning_rate": 4.59293311417761e-07, + "logits/chosen": -0.798182487487793, + "logits/rejected": -0.7736828327178955, + "logps/chosen": -1.8617057800292969, + "logps/rejected": -2.08984637260437, + "loss": 1.3947, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.7234115600585938, + "rewards/margins": 0.4562810957431793, + "rewards/rejected": -4.17969274520874, + "step": 271 + }, + { + "epoch": 0.5750908490254377, + "grad_norm": 6.7946457862854, + "learning_rate": 4.556020523385326e-07, + "logits/chosen": -0.7530428171157837, + "logits/rejected": -0.7395590543746948, + "logps/chosen": -1.8709862232208252, + "logps/rejected": -2.3599390983581543, + "loss": 1.1025, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.7419724464416504, + "rewards/margins": 0.9779053926467896, + "rewards/rejected": -4.719878196716309, + "step": 272 + }, + { + "epoch": 0.5772051536174431, + "grad_norm": 4.877624988555908, + "learning_rate": 4.5191323067127773e-07, + "logits/chosen": -0.7732480764389038, + "logits/rejected": -0.7835702300071716, + "logps/chosen": -2.0340800285339355, + "logps/rejected": -2.330742835998535, + "loss": 1.3198, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.068160057067871, + "rewards/margins": 0.5933258533477783, + "rewards/rejected": -4.66148567199707, + "step": 273 + }, + { + "epoch": 0.5793194582094483, + "grad_norm": 9.001680374145508, + "learning_rate": 4.482270489293685e-07, + "logits/chosen": -0.9062263369560242, + "logits/rejected": -0.9105854630470276, + "logps/chosen": -2.1364972591400146, + "logps/rejected": -2.4467523097991943, + "loss": 1.3464, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.272994518280029, + "rewards/margins": 0.6205099821090698, + "rewards/rejected": -4.893504619598389, + "step": 274 + }, + { + "epoch": 0.5814337628014535, + "grad_norm": 2.811025619506836, + "learning_rate": 4.445437094812475e-07, + "logits/chosen": -0.8593579530715942, + "logits/rejected": -0.8343831896781921, + "logps/chosen": -2.452843189239502, + "logps/rejected": -2.7551848888397217, + "loss": 1.3536, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.905686378479004, + "rewards/margins": 0.6046838760375977, + "rewards/rejected": -5.510369777679443, + "step": 275 + }, + { + "epoch": 0.5835480673934589, + "grad_norm": 2.2030158042907715, + "learning_rate": 4.4086341453931714e-07, + "logits/chosen": -0.8991417288780212, + "logits/rejected": -0.8766486644744873, + "logps/chosen": -2.30641508102417, + "logps/rejected": -2.7606654167175293, + "loss": 1.1708, + "rewards/accuracies": 0.6875, + "rewards/chosen": -4.61283016204834, + "rewards/margins": 0.9085015654563904, + "rewards/rejected": -5.521330833435059, + "step": 276 + }, + { + "epoch": 0.5856623719854641, + "grad_norm": 5.5185227394104, + "learning_rate": 4.371863661488393e-07, + "logits/chosen": -0.8738227486610413, + "logits/rejected": -0.8665530681610107, + "logps/chosen": -2.29125714302063, + "logps/rejected": -2.7014153003692627, + "loss": 1.1883, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.58251428604126, + "rewards/margins": 0.8203167915344238, + "rewards/rejected": -5.402830600738525, + "step": 277 + }, + { + "epoch": 0.5877766765774695, + "grad_norm": 2.0779521465301514, + "learning_rate": 4.3351276617684285e-07, + "logits/chosen": -0.958415150642395, + "logits/rejected": -0.9585077166557312, + "logps/chosen": -2.4368410110473633, + "logps/rejected": -2.798506736755371, + "loss": 1.1749, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.873682022094727, + "rewards/margins": 0.7233313322067261, + "rewards/rejected": -5.597013473510742, + "step": 278 + }, + { + "epoch": 0.5898909811694747, + "grad_norm": 2.884877920150757, + "learning_rate": 4.29842816301041e-07, + "logits/chosen": -0.9413051605224609, + "logits/rejected": -0.9224691987037659, + "logps/chosen": -2.485034942626953, + "logps/rejected": -2.911332368850708, + "loss": 1.2035, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.970069885253906, + "rewards/margins": 0.8525944948196411, + "rewards/rejected": -5.822664737701416, + "step": 279 + }, + { + "epoch": 0.59200528576148, + "grad_norm": 5.203248500823975, + "learning_rate": 4.2617671799875944e-07, + "logits/chosen": -0.9359334111213684, + "logits/rejected": -0.9387660026550293, + "logps/chosen": -2.378349542617798, + "logps/rejected": -2.730886936187744, + "loss": 1.2253, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.756699085235596, + "rewards/margins": 0.7050745487213135, + "rewards/rejected": -5.461773872375488, + "step": 280 + }, + { + "epoch": 0.5941195903534853, + "grad_norm": 6.818525314331055, + "learning_rate": 4.225146725358758e-07, + "logits/chosen": -0.8864554166793823, + "logits/rejected": -0.8813320398330688, + "logps/chosen": -2.4233975410461426, + "logps/rejected": -2.8188178539276123, + "loss": 1.2281, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -4.846795082092285, + "rewards/margins": 0.7908411622047424, + "rewards/rejected": -5.637635707855225, + "step": 281 + }, + { + "epoch": 0.5962338949454906, + "grad_norm": 2.529154062271118, + "learning_rate": 4.1885688095577e-07, + "logits/chosen": -0.8420325517654419, + "logits/rejected": -0.8822402954101562, + "logps/chosen": -2.626488447189331, + "logps/rejected": -3.1887192726135254, + "loss": 1.0561, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.252976894378662, + "rewards/margins": 1.1244611740112305, + "rewards/rejected": -6.377438545227051, + "step": 282 + }, + { + "epoch": 0.5983481995374959, + "grad_norm": 3.0739686489105225, + "learning_rate": 4.152035440682873e-07, + "logits/chosen": -0.8550993204116821, + "logits/rejected": -0.8528580665588379, + "logps/chosen": -2.6387887001037598, + "logps/rejected": -2.9952192306518555, + "loss": 1.3409, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.2775774002075195, + "rewards/margins": 0.7128612399101257, + "rewards/rejected": -5.990438461303711, + "step": 283 + }, + { + "epoch": 0.6004625041295012, + "grad_norm": 3.6649062633514404, + "learning_rate": 4.1155486243871363e-07, + "logits/chosen": -0.8643282651901245, + "logits/rejected": -0.9175342321395874, + "logps/chosen": -2.929072618484497, + "logps/rejected": -3.105940580368042, + "loss": 1.5121, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.858145236968994, + "rewards/margins": 0.3537355065345764, + "rewards/rejected": -6.211881160736084, + "step": 284 + }, + { + "epoch": 0.6025768087215064, + "grad_norm": 2.5071723461151123, + "learning_rate": 4.0791103637676486e-07, + "logits/chosen": -0.8368631601333618, + "logits/rejected": -0.819808304309845, + "logps/chosen": -3.0672600269317627, + "logps/rejected": -3.4685003757476807, + "loss": 1.3236, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.134520053863525, + "rewards/margins": 0.8024805784225464, + "rewards/rejected": -6.937000751495361, + "step": 285 + }, + { + "epoch": 0.6046911133135118, + "grad_norm": 8.780280113220215, + "learning_rate": 4.042722659255906e-07, + "logits/chosen": -0.8249569535255432, + "logits/rejected": -0.8442113995552063, + "logps/chosen": -3.3199872970581055, + "logps/rejected": -3.7276291847229004, + "loss": 1.322, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -6.639974594116211, + "rewards/margins": 0.8152831792831421, + "rewards/rejected": -7.455258369445801, + "step": 286 + }, + { + "epoch": 0.606805417905517, + "grad_norm": 3.4388678073883057, + "learning_rate": 4.006387508507914e-07, + "logits/chosen": -0.7224047780036926, + "logits/rejected": -0.7616450786590576, + "logps/chosen": -2.9411330223083496, + "logps/rejected": -3.32680082321167, + "loss": 1.2868, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.882266044616699, + "rewards/margins": 0.7713361978530884, + "rewards/rejected": -6.65360164642334, + "step": 287 + }, + { + "epoch": 0.6089197224975224, + "grad_norm": 5.095273971557617, + "learning_rate": 3.970106906294509e-07, + "logits/chosen": -0.7394692897796631, + "logits/rejected": -0.7316830158233643, + "logps/chosen": -2.9902045726776123, + "logps/rejected": -3.469916820526123, + "loss": 1.1694, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.980409145355225, + "rewards/margins": 0.9594244360923767, + "rewards/rejected": -6.939833641052246, + "step": 288 + }, + { + "epoch": 0.6110340270895276, + "grad_norm": 2.1398613452911377, + "learning_rate": 3.933882844391866e-07, + "logits/chosen": -0.8181312084197998, + "logits/rejected": -0.833306610584259, + "logps/chosen": -3.0137529373168945, + "logps/rejected": -3.4241840839385986, + "loss": 1.2453, + "rewards/accuracies": 0.609375, + "rewards/chosen": -6.027505874633789, + "rewards/margins": 0.8208625316619873, + "rewards/rejected": -6.848368167877197, + "step": 289 + }, + { + "epoch": 0.6131483316815328, + "grad_norm": 4.185284614562988, + "learning_rate": 3.89771731147214e-07, + "logits/chosen": -0.7805104851722717, + "logits/rejected": -0.8086984753608704, + "logps/chosen": -2.984957218170166, + "logps/rejected": -3.430112361907959, + "loss": 1.2671, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -5.969914436340332, + "rewards/margins": 0.890310525894165, + "rewards/rejected": -6.860224723815918, + "step": 290 + }, + { + "epoch": 0.6152626362735382, + "grad_norm": 7.104829788208008, + "learning_rate": 3.861612292994292e-07, + "logits/chosen": -0.7788286209106445, + "logits/rejected": -0.8027424216270447, + "logps/chosen": -2.896563768386841, + "logps/rejected": -3.1082046031951904, + "loss": 1.4853, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.793127536773682, + "rewards/margins": 0.42328107357025146, + "rewards/rejected": -6.216409206390381, + "step": 291 + }, + { + "epoch": 0.6173769408655434, + "grad_norm": 3.795579433441162, + "learning_rate": 3.825569771095082e-07, + "logits/chosen": -0.8044757843017578, + "logits/rejected": -0.7828265428543091, + "logps/chosen": -2.8059256076812744, + "logps/rejected": -3.3121094703674316, + "loss": 1.1299, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.611851215362549, + "rewards/margins": 1.0123679637908936, + "rewards/rejected": -6.624218940734863, + "step": 292 + }, + { + "epoch": 0.6194912454575487, + "grad_norm": 4.486142158508301, + "learning_rate": 3.7895917244802655e-07, + "logits/chosen": -0.7511788606643677, + "logits/rejected": -0.7885503768920898, + "logps/chosen": -2.927251100540161, + "logps/rejected": -3.1605303287506104, + "loss": 1.426, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.854502201080322, + "rewards/margins": 0.466558575630188, + "rewards/rejected": -6.321060657501221, + "step": 293 + }, + { + "epoch": 0.621605550049554, + "grad_norm": 3.3942787647247314, + "learning_rate": 3.753680128315952e-07, + "logits/chosen": -0.8230300545692444, + "logits/rejected": -0.8042524456977844, + "logps/chosen": -2.524353504180908, + "logps/rejected": -2.8687357902526855, + "loss": 1.2653, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.048707008361816, + "rewards/margins": 0.6887640953063965, + "rewards/rejected": -5.737471580505371, + "step": 294 + }, + { + "epoch": 0.6237198546415593, + "grad_norm": 4.326812744140625, + "learning_rate": 3.717836954120178e-07, + "logits/chosen": -0.7763381004333496, + "logits/rejected": -0.7852378487586975, + "logps/chosen": -2.4861948490142822, + "logps/rejected": -2.8822267055511475, + "loss": 1.124, + "rewards/accuracies": 0.75, + "rewards/chosen": -4.9723896980285645, + "rewards/margins": 0.7920635938644409, + "rewards/rejected": -5.764453411102295, + "step": 295 + }, + { + "epoch": 0.6258341592335646, + "grad_norm": 3.886293649673462, + "learning_rate": 3.6820641696546627e-07, + "logits/chosen": -0.8350138664245605, + "logits/rejected": -0.8594292998313904, + "logps/chosen": -2.1301493644714355, + "logps/rejected": -2.3678014278411865, + "loss": 1.3532, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.260298728942871, + "rewards/margins": 0.4753049314022064, + "rewards/rejected": -4.735602855682373, + "step": 296 + }, + { + "epoch": 0.6279484638255699, + "grad_norm": 1.9318888187408447, + "learning_rate": 3.6463637388167875e-07, + "logits/chosen": -0.812870979309082, + "logits/rejected": -0.8393633961677551, + "logps/chosen": -2.0607728958129883, + "logps/rejected": -2.4457521438598633, + "loss": 1.2317, + "rewards/accuracies": 0.609375, + "rewards/chosen": -4.121545791625977, + "rewards/margins": 0.76995849609375, + "rewards/rejected": -4.891504287719727, + "step": 297 + }, + { + "epoch": 0.6300627684175751, + "grad_norm": 2.731139898300171, + "learning_rate": 3.610737621531781e-07, + "logits/chosen": -0.7860711216926575, + "logits/rejected": -0.8006534576416016, + "logps/chosen": -1.9324530363082886, + "logps/rejected": -2.2838711738586426, + "loss": 1.2986, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.864906072616577, + "rewards/margins": 0.7028359174728394, + "rewards/rejected": -4.567742347717285, + "step": 298 + }, + { + "epoch": 0.6321770730095805, + "grad_norm": 3.118441581726074, + "learning_rate": 3.575187773645112e-07, + "logits/chosen": -0.6946629285812378, + "logits/rejected": -0.6832380294799805, + "logps/chosen": -2.2569775581359863, + "logps/rejected": -2.6153128147125244, + "loss": 1.2166, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.513955116271973, + "rewards/margins": 0.7166703343391418, + "rewards/rejected": -5.230625629425049, + "step": 299 + }, + { + "epoch": 0.6342913776015857, + "grad_norm": 4.998100757598877, + "learning_rate": 3.5397161468151214e-07, + "logits/chosen": -0.7972643375396729, + "logits/rejected": -0.7864660620689392, + "logps/chosen": -2.227022886276245, + "logps/rejected": -2.57175350189209, + "loss": 1.2796, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.45404577255249, + "rewards/margins": 0.6894608736038208, + "rewards/rejected": -5.14350700378418, + "step": 300 + }, + { + "epoch": 0.6364056821935911, + "grad_norm": 6.259451866149902, + "learning_rate": 3.5043246884058777e-07, + "logits/chosen": -0.6282143592834473, + "logits/rejected": -0.6314865350723267, + "logps/chosen": -2.4372308254241943, + "logps/rejected": -2.8582205772399902, + "loss": 1.1592, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.874461650848389, + "rewards/margins": 0.8419792056083679, + "rewards/rejected": -5.7164411544799805, + "step": 301 + }, + { + "epoch": 0.6385199867855963, + "grad_norm": 2.577531337738037, + "learning_rate": 3.4690153413802653e-07, + "logits/chosen": -0.658220648765564, + "logits/rejected": -0.6330516934394836, + "logps/chosen": -2.6647050380706787, + "logps/rejected": -3.1917996406555176, + "loss": 1.2609, + "rewards/accuracies": 0.671875, + "rewards/chosen": -5.329410076141357, + "rewards/margins": 1.0541892051696777, + "rewards/rejected": -6.383599281311035, + "step": 302 + }, + { + "epoch": 0.6406342913776016, + "grad_norm": 4.733935356140137, + "learning_rate": 3.4337900441933227e-07, + "logits/chosen": -0.5048555731773376, + "logits/rejected": -0.45112305879592896, + "logps/chosen": -2.5193920135498047, + "logps/rejected": -3.1279971599578857, + "loss": 1.0648, + "rewards/accuracies": 0.765625, + "rewards/chosen": -5.038784027099609, + "rewards/margins": 1.2172104120254517, + "rewards/rejected": -6.2559943199157715, + "step": 303 + }, + { + "epoch": 0.6427485959696069, + "grad_norm": 5.54962158203125, + "learning_rate": 3.3986507306858125e-07, + "logits/chosen": -0.5305406451225281, + "logits/rejected": -0.5246613025665283, + "logps/chosen": -2.8851962089538574, + "logps/rejected": -3.248018264770508, + "loss": 1.4329, + "rewards/accuracies": 0.625, + "rewards/chosen": -5.770392417907715, + "rewards/margins": 0.7256444692611694, + "rewards/rejected": -6.496036529541016, + "step": 304 + }, + { + "epoch": 0.6448629005616121, + "grad_norm": 2.827944278717041, + "learning_rate": 3.363599329978066e-07, + "logits/chosen": -0.4795135259628296, + "logits/rejected": -0.4911767244338989, + "logps/chosen": -3.0268373489379883, + "logps/rejected": -3.4411511421203613, + "loss": 1.4083, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.053674697875977, + "rewards/margins": 0.8286278247833252, + "rewards/rejected": -6.882302284240723, + "step": 305 + }, + { + "epoch": 0.6469772051536175, + "grad_norm": 5.35672664642334, + "learning_rate": 3.328637766364075e-07, + "logits/chosen": -0.4823904037475586, + "logits/rejected": -0.48555058240890503, + "logps/chosen": -2.990793466567993, + "logps/rejected": -3.529240846633911, + "loss": 1.1417, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.981586933135986, + "rewards/margins": 1.0768945217132568, + "rewards/rejected": -7.058481693267822, + "step": 306 + }, + { + "epoch": 0.6490915097456227, + "grad_norm": 2.8072359561920166, + "learning_rate": 3.2937679592058396e-07, + "logits/chosen": -0.4903571605682373, + "logits/rejected": -0.46411609649658203, + "logps/chosen": -2.8665530681610107, + "logps/rejected": -3.542123556137085, + "loss": 1.2485, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.7331061363220215, + "rewards/margins": 1.3511409759521484, + "rewards/rejected": -7.08424711227417, + "step": 307 + }, + { + "epoch": 0.651205814337628, + "grad_norm": 6.341434478759766, + "learning_rate": 3.2589918228280066e-07, + "logits/chosen": -0.4496378004550934, + "logits/rejected": -0.35389459133148193, + "logps/chosen": -2.8208916187286377, + "logps/rejected": -3.326601505279541, + "loss": 1.3089, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -5.641783237457275, + "rewards/margins": 1.011419653892517, + "rewards/rejected": -6.653203010559082, + "step": 308 + }, + { + "epoch": 0.6533201189296333, + "grad_norm": 2.5416784286499023, + "learning_rate": 3.2243112664127723e-07, + "logits/chosen": -0.44504135847091675, + "logits/rejected": -0.42088568210601807, + "logps/chosen": -2.7710533142089844, + "logps/rejected": -3.4406185150146484, + "loss": 1.2213, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.542106628417969, + "rewards/margins": 1.3391309976577759, + "rewards/rejected": -6.881237030029297, + "step": 309 + }, + { + "epoch": 0.6554344235216386, + "grad_norm": 4.573229789733887, + "learning_rate": 3.189728193895069e-07, + "logits/chosen": -0.31100764870643616, + "logits/rejected": -0.32552966475486755, + "logps/chosen": -3.099289655685425, + "logps/rejected": -3.5152204036712646, + "loss": 1.3571, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -6.19857931137085, + "rewards/margins": 0.8318620324134827, + "rewards/rejected": -7.030440807342529, + "step": 310 + }, + { + "epoch": 0.6575487281136438, + "grad_norm": 3.7587928771972656, + "learning_rate": 3.155244503858041e-07, + "logits/chosen": -0.4225979447364807, + "logits/rejected": -0.43882372975349426, + "logps/chosen": -2.9082608222961426, + "logps/rejected": -3.2239482402801514, + "loss": 1.3415, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.816521644592285, + "rewards/margins": 0.6313749551773071, + "rewards/rejected": -6.447896480560303, + "step": 311 + }, + { + "epoch": 0.6596630327056492, + "grad_norm": 5.79728889465332, + "learning_rate": 3.12086208942881e-07, + "logits/chosen": -0.48076939582824707, + "logits/rejected": -0.41990721225738525, + "logps/chosen": -2.7089650630950928, + "logps/rejected": -3.29990291595459, + "loss": 1.1423, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.4179301261901855, + "rewards/margins": 1.181876540184021, + "rewards/rejected": -6.59980583190918, + "step": 312 + }, + { + "epoch": 0.6617773372976544, + "grad_norm": 7.405224800109863, + "learning_rate": 3.086582838174551e-07, + "logits/chosen": -0.48003631830215454, + "logits/rejected": -0.40571871399879456, + "logps/chosen": -2.53741455078125, + "logps/rejected": -3.0145747661590576, + "loss": 1.3247, + "rewards/accuracies": 0.609375, + "rewards/chosen": -5.0748291015625, + "rewards/margins": 0.9543203115463257, + "rewards/rejected": -6.029149532318115, + "step": 313 + }, + { + "epoch": 0.6638916418896598, + "grad_norm": 6.371465682983398, + "learning_rate": 3.052408631998863e-07, + "logits/chosen": -0.42537638545036316, + "logits/rejected": -0.39384835958480835, + "logps/chosen": -3.006593942642212, + "logps/rejected": -3.4665465354919434, + "loss": 1.2648, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -6.013187885284424, + "rewards/margins": 0.919904887676239, + "rewards/rejected": -6.933093070983887, + "step": 314 + }, + { + "epoch": 0.666005946481665, + "grad_norm": 4.65411376953125, + "learning_rate": 3.018341347038453e-07, + "logits/chosen": -0.38848310708999634, + "logits/rejected": -0.3435167670249939, + "logps/chosen": -2.9562084674835205, + "logps/rejected": -3.5491316318511963, + "loss": 1.1353, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.912416934967041, + "rewards/margins": 1.1858452558517456, + "rewards/rejected": -7.098263263702393, + "step": 315 + }, + { + "epoch": 0.6681202510736703, + "grad_norm": 5.089771747589111, + "learning_rate": 2.9843828535601397e-07, + "logits/chosen": -0.3452882170677185, + "logits/rejected": -0.29303884506225586, + "logps/chosen": -2.5367987155914307, + "logps/rejected": -3.172724723815918, + "loss": 1.2002, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.073597431182861, + "rewards/margins": 1.2718524932861328, + "rewards/rejected": -6.345449447631836, + "step": 316 + }, + { + "epoch": 0.6702345556656756, + "grad_norm": 4.480255603790283, + "learning_rate": 2.9505350158581697e-07, + "logits/chosen": -0.47401517629623413, + "logits/rejected": -0.45950815081596375, + "logps/chosen": -2.45076322555542, + "logps/rejected": -2.998079299926758, + "loss": 1.2545, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.90152645111084, + "rewards/margins": 1.0946320295333862, + "rewards/rejected": -5.996158599853516, + "step": 317 + }, + { + "epoch": 0.6723488602576809, + "grad_norm": 3.6318399906158447, + "learning_rate": 2.916799692151884e-07, + "logits/chosen": -0.20774951577186584, + "logits/rejected": -0.21114808320999146, + "logps/chosen": -2.8932981491088867, + "logps/rejected": -3.613022565841675, + "loss": 1.1187, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.786596298217773, + "rewards/margins": 1.4394491910934448, + "rewards/rejected": -7.22604513168335, + "step": 318 + }, + { + "epoch": 0.6744631648496862, + "grad_norm": 6.601771831512451, + "learning_rate": 2.883178734483692e-07, + "logits/chosen": -0.3821495473384857, + "logits/rejected": -0.35181915760040283, + "logps/chosen": -2.5047662258148193, + "logps/rejected": -3.074918270111084, + "loss": 1.1545, + "rewards/accuracies": 0.71875, + "rewards/chosen": -5.009532451629639, + "rewards/margins": 1.1403042078018188, + "rewards/rejected": -6.149836540222168, + "step": 319 + }, + { + "epoch": 0.6765774694416914, + "grad_norm": 3.077775716781616, + "learning_rate": 2.849673988617399e-07, + "logits/chosen": -0.4517952799797058, + "logits/rejected": -0.3880998194217682, + "logps/chosen": -2.5404443740844727, + "logps/rejected": -3.007855176925659, + "loss": 1.2441, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -5.080888748168945, + "rewards/margins": 0.9348208904266357, + "rewards/rejected": -6.015710353851318, + "step": 320 + }, + { + "epoch": 0.6786917740336967, + "grad_norm": 4.130971908569336, + "learning_rate": 2.8162872939368674e-07, + "logits/chosen": -0.3455219566822052, + "logits/rejected": -0.3199109137058258, + "logps/chosen": -2.5115320682525635, + "logps/rejected": -3.0809438228607178, + "loss": 1.1814, + "rewards/accuracies": 0.71875, + "rewards/chosen": -5.023064136505127, + "rewards/margins": 1.1388237476348877, + "rewards/rejected": -6.1618876457214355, + "step": 321 + }, + { + "epoch": 0.680806078625702, + "grad_norm": 6.414750099182129, + "learning_rate": 2.783020483345057e-07, + "logits/chosen": -0.500693142414093, + "logits/rejected": -0.43053722381591797, + "logps/chosen": -2.627499580383301, + "logps/rejected": -3.176882266998291, + "loss": 1.2207, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.254999160766602, + "rewards/margins": 1.0987658500671387, + "rewards/rejected": -6.353764533996582, + "step": 322 + }, + { + "epoch": 0.6829203832177073, + "grad_norm": 3.8955185413360596, + "learning_rate": 2.749875383163377e-07, + "logits/chosen": -0.3386150896549225, + "logits/rejected": -0.3456903100013733, + "logps/chosen": -2.5545601844787598, + "logps/rejected": -3.0574111938476562, + "loss": 1.2667, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.1091203689575195, + "rewards/margins": 1.0057018995285034, + "rewards/rejected": -6.1148223876953125, + "step": 323 + }, + { + "epoch": 0.6850346878097126, + "grad_norm": 4.244959831237793, + "learning_rate": 2.7168538130314345e-07, + "logits/chosen": -0.4657687246799469, + "logits/rejected": -0.41878795623779297, + "logps/chosen": -2.3406989574432373, + "logps/rejected": -2.74613094329834, + "loss": 1.2982, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.681397914886475, + "rewards/margins": 0.8108637928962708, + "rewards/rejected": -5.49226188659668, + "step": 324 + }, + { + "epoch": 0.6871489924017179, + "grad_norm": 8.914139747619629, + "learning_rate": 2.683957585807136e-07, + "logits/chosen": -0.42120760679244995, + "logits/rejected": -0.34997111558914185, + "logps/chosen": -2.4362924098968506, + "logps/rejected": -2.8844237327575684, + "loss": 1.3185, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.872584819793701, + "rewards/margins": 0.8962627649307251, + "rewards/rejected": -5.768847465515137, + "step": 325 + }, + { + "epoch": 0.6892632969937231, + "grad_norm": 2.8318073749542236, + "learning_rate": 2.651188507467161e-07, + "logits/chosen": -0.4435175657272339, + "logits/rejected": -0.43688836693763733, + "logps/chosen": -2.316673994064331, + "logps/rejected": -2.6802306175231934, + "loss": 1.2727, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.633347988128662, + "rewards/margins": 0.7271134853363037, + "rewards/rejected": -5.360461235046387, + "step": 326 + }, + { + "epoch": 0.6913776015857285, + "grad_norm": 9.15845012664795, + "learning_rate": 2.618548377007817e-07, + "logits/chosen": -0.4659804105758667, + "logits/rejected": -0.43525823950767517, + "logps/chosen": -2.3177073001861572, + "logps/rejected": -2.674837350845337, + "loss": 1.3204, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.6354146003723145, + "rewards/margins": 0.7142605781555176, + "rewards/rejected": -5.349674701690674, + "step": 327 + }, + { + "epoch": 0.6934919061777337, + "grad_norm": 8.41653060913086, + "learning_rate": 2.5860389863462763e-07, + "logits/chosen": -0.42244386672973633, + "logits/rejected": -0.3488731384277344, + "logps/chosen": -2.3063669204711914, + "logps/rejected": -2.8124496936798096, + "loss": 1.2621, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.612733840942383, + "rewards/margins": 1.0121653079986572, + "rewards/rejected": -5.624899387359619, + "step": 328 + }, + { + "epoch": 0.695606210769739, + "grad_norm": 8.558746337890625, + "learning_rate": 2.5536621202221986e-07, + "logits/chosen": -0.4081762433052063, + "logits/rejected": -0.3913821578025818, + "logps/chosen": -2.331026554107666, + "logps/rejected": -2.799482583999634, + "loss": 1.2435, + "rewards/accuracies": 0.59375, + "rewards/chosen": -4.662053108215332, + "rewards/margins": 0.9369123578071594, + "rewards/rejected": -5.598965167999268, + "step": 329 + }, + { + "epoch": 0.6977205153617443, + "grad_norm": 7.550137519836426, + "learning_rate": 2.521419556099754e-07, + "logits/chosen": -0.5334538221359253, + "logits/rejected": -0.5046267509460449, + "logps/chosen": -2.3662197589874268, + "logps/rejected": -2.8178446292877197, + "loss": 1.2172, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -4.7324395179748535, + "rewards/margins": 0.9032500386238098, + "rewards/rejected": -5.6356892585754395, + "step": 330 + }, + { + "epoch": 0.6998348199537496, + "grad_norm": 4.939478397369385, + "learning_rate": 2.4893130640700364e-07, + "logits/chosen": -0.5103824138641357, + "logits/rejected": -0.49076637625694275, + "logps/chosen": -2.0302557945251465, + "logps/rejected": -2.4443471431732178, + "loss": 1.1939, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.060511589050293, + "rewards/margins": 0.8281831741333008, + "rewards/rejected": -4.8886942863464355, + "step": 331 + }, + { + "epoch": 0.7019491245457549, + "grad_norm": 5.584677219390869, + "learning_rate": 2.4573444067538985e-07, + "logits/chosen": -0.46035417914390564, + "logits/rejected": -0.4546043574810028, + "logps/chosen": -2.1907548904418945, + "logps/rejected": -2.4913454055786133, + "loss": 1.4253, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -4.381509780883789, + "rewards/margins": 0.6011807322502136, + "rewards/rejected": -4.982690811157227, + "step": 332 + }, + { + "epoch": 0.7040634291377602, + "grad_norm": 3.398441791534424, + "learning_rate": 2.425515339205165e-07, + "logits/chosen": -0.5569466352462769, + "logits/rejected": -0.5756793022155762, + "logps/chosen": -2.037411689758301, + "logps/rejected": -2.3700244426727295, + "loss": 1.3425, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -4.074823379516602, + "rewards/margins": 0.6652255654335022, + "rewards/rejected": -4.740048885345459, + "step": 333 + }, + { + "epoch": 0.7061777337297654, + "grad_norm": 8.54529094696045, + "learning_rate": 2.3938276088143e-07, + "logits/chosen": -0.5746757388114929, + "logits/rejected": -0.5874296426773071, + "logps/chosen": -2.1479601860046387, + "logps/rejected": -2.584625244140625, + "loss": 1.2366, + "rewards/accuracies": 0.71875, + "rewards/chosen": -4.295920372009277, + "rewards/margins": 0.8733301758766174, + "rewards/rejected": -5.16925048828125, + "step": 334 + }, + { + "epoch": 0.7082920383217707, + "grad_norm": 5.141815662384033, + "learning_rate": 2.362282955212473e-07, + "logits/chosen": -0.6492913961410522, + "logits/rejected": -0.5812432765960693, + "logps/chosen": -1.9753435850143433, + "logps/rejected": -2.340383768081665, + "loss": 1.2197, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.9506871700286865, + "rewards/margins": 0.7300805449485779, + "rewards/rejected": -4.68076753616333, + "step": 335 + }, + { + "epoch": 0.710406342913776, + "grad_norm": 5.991698265075684, + "learning_rate": 2.3308831101760483e-07, + "logits/chosen": -0.6887751221656799, + "logits/rejected": -0.6923843622207642, + "logps/chosen": -1.577715277671814, + "logps/rejected": -1.861379623413086, + "loss": 1.2608, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.155430555343628, + "rewards/margins": 0.5673283338546753, + "rewards/rejected": -3.722759246826172, + "step": 336 + }, + { + "epoch": 0.7125206475057813, + "grad_norm": 1.5719850063323975, + "learning_rate": 2.2996297975315097e-07, + "logits/chosen": -0.6095813512802124, + "logits/rejected": -0.5842909216880798, + "logps/chosen": -1.6973541975021362, + "logps/rejected": -2.1261086463928223, + "loss": 1.2424, + "rewards/accuracies": 0.609375, + "rewards/chosen": -3.3947083950042725, + "rewards/margins": 0.857509195804596, + "rewards/rejected": -4.2522172927856445, + "step": 337 + }, + { + "epoch": 0.7146349520977866, + "grad_norm": 4.785243511199951, + "learning_rate": 2.2685247330608414e-07, + "logits/chosen": -0.7062411308288574, + "logits/rejected": -0.6849475502967834, + "logps/chosen": -1.6659798622131348, + "logps/rejected": -1.980202555656433, + "loss": 1.2512, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.3319597244262695, + "rewards/margins": 0.6284454464912415, + "rewards/rejected": -3.960405111312866, + "step": 338 + }, + { + "epoch": 0.7167492566897918, + "grad_norm": 4.3183674812316895, + "learning_rate": 2.2375696244073123e-07, + "logits/chosen": -0.6655697822570801, + "logits/rejected": -0.6642571687698364, + "logps/chosen": -1.615012764930725, + "logps/rejected": -1.9022549390792847, + "loss": 1.398, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.23002552986145, + "rewards/margins": 0.5744845867156982, + "rewards/rejected": -3.8045098781585693, + "step": 339 + }, + { + "epoch": 0.7188635612817972, + "grad_norm": 3.458740472793579, + "learning_rate": 2.2067661709817382e-07, + "logits/chosen": -0.6138105988502502, + "logits/rejected": -0.6241220235824585, + "logps/chosen": -1.5244299173355103, + "logps/rejected": -1.8252849578857422, + "loss": 1.2257, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -3.0488598346710205, + "rewards/margins": 0.6017097234725952, + "rewards/rejected": -3.6505699157714844, + "step": 340 + }, + { + "epoch": 0.7209778658738024, + "grad_norm": 3.3990859985351562, + "learning_rate": 2.1761160638691838e-07, + "logits/chosen": -0.596839964389801, + "logits/rejected": -0.5929630398750305, + "logps/chosen": -1.4333155155181885, + "logps/rejected": -1.820554494857788, + "loss": 1.1124, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.866631031036377, + "rewards/margins": 0.7744779586791992, + "rewards/rejected": -3.641108989715576, + "step": 341 + }, + { + "epoch": 0.7230921704658078, + "grad_norm": 2.742016315460205, + "learning_rate": 2.1456209857361246e-07, + "logits/chosen": -0.6483213901519775, + "logits/rejected": -0.6418218612670898, + "logps/chosen": -1.4174959659576416, + "logps/rejected": -1.831233263015747, + "loss": 1.1372, + "rewards/accuracies": 0.703125, + "rewards/chosen": -2.834991931915283, + "rewards/margins": 0.8274745941162109, + "rewards/rejected": -3.662466526031494, + "step": 342 + }, + { + "epoch": 0.725206475057813, + "grad_norm": 2.5489015579223633, + "learning_rate": 2.1152826107380651e-07, + "logits/chosen": -0.599895179271698, + "logits/rejected": -0.6154446005821228, + "logps/chosen": -1.4996072053909302, + "logps/rejected": -1.7961615324020386, + "loss": 1.2288, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.9992144107818604, + "rewards/margins": 0.5931087732315063, + "rewards/rejected": -3.592323064804077, + "step": 343 + }, + { + "epoch": 0.7273207796498183, + "grad_norm": 2.8836190700531006, + "learning_rate": 2.0851026044276405e-07, + "logits/chosen": -0.7359989285469055, + "logits/rejected": -0.7111036777496338, + "logps/chosen": -1.32615065574646, + "logps/rejected": -1.6067696809768677, + "loss": 1.2088, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.65230131149292, + "rewards/margins": 0.5612384080886841, + "rewards/rejected": -3.2135393619537354, + "step": 344 + }, + { + "epoch": 0.7294350842418236, + "grad_norm": 3.1838135719299316, + "learning_rate": 2.0550826236631596e-07, + "logits/chosen": -0.6709272265434265, + "logits/rejected": -0.6708023548126221, + "logps/chosen": -1.2859303951263428, + "logps/rejected": -1.6929675340652466, + "loss": 1.1446, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.5718607902526855, + "rewards/margins": 0.8140743374824524, + "rewards/rejected": -3.385935068130493, + "step": 345 + }, + { + "epoch": 0.7315493888338289, + "grad_norm": 2.4209675788879395, + "learning_rate": 2.025224316517663e-07, + "logits/chosen": -0.7540403604507446, + "logits/rejected": -0.7601196765899658, + "logps/chosen": -1.3634543418884277, + "logps/rejected": -1.6112797260284424, + "loss": 1.2561, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.7269086837768555, + "rewards/margins": 0.4956510066986084, + "rewards/rejected": -3.2225594520568848, + "step": 346 + }, + { + "epoch": 0.7336636934258342, + "grad_norm": 5.405437469482422, + "learning_rate": 1.9955293221884402e-07, + "logits/chosen": -0.7241419553756714, + "logits/rejected": -0.7224253416061401, + "logps/chosen": -1.2650585174560547, + "logps/rejected": -1.639666199684143, + "loss": 1.1565, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.5301170349121094, + "rewards/margins": 0.7492151856422424, + "rewards/rejected": -3.279332399368286, + "step": 347 + }, + { + "epoch": 0.7357779980178395, + "grad_norm": 1.5863631963729858, + "learning_rate": 1.9659992709070344e-07, + "logits/chosen": -0.7479431629180908, + "logits/rejected": -0.7219806909561157, + "logps/chosen": -1.294840931892395, + "logps/rejected": -1.6082017421722412, + "loss": 1.1693, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.58968186378479, + "rewards/margins": 0.6267215013504028, + "rewards/rejected": -3.2164034843444824, + "step": 348 + }, + { + "epoch": 0.7378923026098447, + "grad_norm": 1.7051454782485962, + "learning_rate": 1.936635783849742e-07, + "logits/chosen": -0.6940132975578308, + "logits/rejected": -0.7377297878265381, + "logps/chosen": -1.1897408962249756, + "logps/rejected": -1.631073236465454, + "loss": 1.1069, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.379481792449951, + "rewards/margins": 0.8826643228530884, + "rewards/rejected": -3.262146472930908, + "step": 349 + }, + { + "epoch": 0.74000660720185, + "grad_norm": 2.704514980316162, + "learning_rate": 1.907440473048626e-07, + "logits/chosen": -0.6926394104957581, + "logits/rejected": -0.7064180374145508, + "logps/chosen": -1.1691362857818604, + "logps/rejected": -1.511006236076355, + "loss": 1.1541, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.3382725715637207, + "rewards/margins": 0.6837398409843445, + "rewards/rejected": -3.02201247215271, + "step": 350 + }, + { + "epoch": 0.7421209117938553, + "grad_norm": 2.3685505390167236, + "learning_rate": 1.8784149413030004e-07, + "logits/chosen": -0.7785338759422302, + "logits/rejected": -0.7802280187606812, + "logps/chosen": -1.267012119293213, + "logps/rejected": -1.5235991477966309, + "loss": 1.177, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.534024238586426, + "rewards/margins": 0.5131738781929016, + "rewards/rejected": -3.0471982955932617, + "step": 351 + }, + { + "epoch": 0.7442352163858605, + "grad_norm": 2.8642280101776123, + "learning_rate": 1.849560782091445e-07, + "logits/chosen": -0.8269493579864502, + "logits/rejected": -0.8431333899497986, + "logps/chosen": -1.228893518447876, + "logps/rejected": -1.5784943103790283, + "loss": 1.1764, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.457787036895752, + "rewards/margins": 0.6992017030715942, + "rewards/rejected": -3.1569886207580566, + "step": 352 + }, + { + "epoch": 0.7463495209778659, + "grad_norm": 4.742166996002197, + "learning_rate": 1.8208795794843246e-07, + "logits/chosen": -0.764488160610199, + "logits/rejected": -0.7553139925003052, + "logps/chosen": -1.3095338344573975, + "logps/rejected": -1.6771752834320068, + "loss": 1.0957, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -2.619067668914795, + "rewards/margins": 0.7352830171585083, + "rewards/rejected": -3.3543505668640137, + "step": 353 + }, + { + "epoch": 0.7484638255698711, + "grad_norm": 3.543769359588623, + "learning_rate": 1.7923729080568239e-07, + "logits/chosen": -0.7355642914772034, + "logits/rejected": -0.7744429707527161, + "logps/chosen": -1.3419017791748047, + "logps/rejected": -1.591749668121338, + "loss": 1.2579, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.6838035583496094, + "rewards/margins": 0.4996955990791321, + "rewards/rejected": -3.183499336242676, + "step": 354 + }, + { + "epoch": 0.7505781301618765, + "grad_norm": 4.187947750091553, + "learning_rate": 1.764042332802506e-07, + "logits/chosen": -0.7009099721908569, + "logits/rejected": -0.6947562098503113, + "logps/chosen": -1.3167665004730225, + "logps/rejected": -1.640596866607666, + "loss": 1.2269, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -2.633533000946045, + "rewards/margins": 0.6476608514785767, + "rewards/rejected": -3.281193733215332, + "step": 355 + }, + { + "epoch": 0.7526924347538817, + "grad_norm": 1.7813458442687988, + "learning_rate": 1.7358894090473924e-07, + "logits/chosen": -0.7276792526245117, + "logits/rejected": -0.7536065578460693, + "logps/chosen": -1.401429295539856, + "logps/rejected": -1.7458314895629883, + "loss": 1.1934, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.802858591079712, + "rewards/margins": 0.6888045072555542, + "rewards/rejected": -3.4916629791259766, + "step": 356 + }, + { + "epoch": 0.754806739345887, + "grad_norm": 2.3192296028137207, + "learning_rate": 1.7079156823645801e-07, + "logits/chosen": -0.6756848096847534, + "logits/rejected": -0.6988381743431091, + "logps/chosen": -1.36654531955719, + "logps/rejected": -1.6672351360321045, + "loss": 1.1928, + "rewards/accuracies": 0.671875, + "rewards/chosen": -2.73309063911438, + "rewards/margins": 0.6013798117637634, + "rewards/rejected": -3.334470272064209, + "step": 357 + }, + { + "epoch": 0.7569210439378923, + "grad_norm": 2.7722420692443848, + "learning_rate": 1.6801226884893893e-07, + "logits/chosen": -0.6857397556304932, + "logits/rejected": -0.7169467806816101, + "logps/chosen": -1.4047114849090576, + "logps/rejected": -1.733205795288086, + "loss": 1.16, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -2.8094229698181152, + "rewards/margins": 0.6569885015487671, + "rewards/rejected": -3.466411590576172, + "step": 358 + }, + { + "epoch": 0.7590353485298976, + "grad_norm": 6.300495624542236, + "learning_rate": 1.6525119532350506e-07, + "logits/chosen": -0.7457281947135925, + "logits/rejected": -0.7319377660751343, + "logps/chosen": -1.282365083694458, + "logps/rejected": -1.6675825119018555, + "loss": 1.0742, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -2.564730167388916, + "rewards/margins": 0.7704350352287292, + "rewards/rejected": -3.335165023803711, + "step": 359 + }, + { + "epoch": 0.7611496531219029, + "grad_norm": 3.5068228244781494, + "learning_rate": 1.6250849924089482e-07, + "logits/chosen": -0.7112680077552795, + "logits/rejected": -0.7166794538497925, + "logps/chosen": -1.3996254205703735, + "logps/rejected": -1.6635833978652954, + "loss": 1.2438, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.799250841140747, + "rewards/margins": 0.5279159545898438, + "rewards/rejected": -3.327166795730591, + "step": 360 + }, + { + "epoch": 0.7632639577139082, + "grad_norm": 1.421538233757019, + "learning_rate": 1.5978433117293883e-07, + "logits/chosen": -0.7009663581848145, + "logits/rejected": -0.6878695487976074, + "logps/chosen": -1.4174691438674927, + "logps/rejected": -1.802457332611084, + "loss": 1.0885, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -2.8349382877349854, + "rewards/margins": 0.7699761986732483, + "rewards/rejected": -3.604914665222168, + "step": 361 + }, + { + "epoch": 0.7653782623059134, + "grad_norm": 3.2645766735076904, + "learning_rate": 1.5707884067429471e-07, + "logits/chosen": -0.6865817904472351, + "logits/rejected": -0.7084690928459167, + "logps/chosen": -1.377517819404602, + "logps/rejected": -1.7079989910125732, + "loss": 1.2371, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.755035638809204, + "rewards/margins": 0.660962700843811, + "rewards/rejected": -3.4159979820251465, + "step": 362 + }, + { + "epoch": 0.7674925668979188, + "grad_norm": 1.973783254623413, + "learning_rate": 1.5439217627423695e-07, + "logits/chosen": -0.7317283153533936, + "logits/rejected": -0.7571225166320801, + "logps/chosen": -1.63040030002594, + "logps/rejected": -2.027442216873169, + "loss": 1.1614, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.26080060005188, + "rewards/margins": 0.7940834760665894, + "rewards/rejected": -4.054884433746338, + "step": 363 + }, + { + "epoch": 0.769606871489924, + "grad_norm": 4.545448303222656, + "learning_rate": 1.5172448546850163e-07, + "logits/chosen": -0.6746503710746765, + "logits/rejected": -0.7073549628257751, + "logps/chosen": -1.321073055267334, + "logps/rejected": -1.6741642951965332, + "loss": 1.1609, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -2.642146110534668, + "rewards/margins": 0.7061826586723328, + "rewards/rejected": -3.3483285903930664, + "step": 364 + }, + { + "epoch": 0.7717211760819292, + "grad_norm": 8.678997039794922, + "learning_rate": 1.490759147111894e-07, + "logits/chosen": -0.6089351773262024, + "logits/rejected": -0.6172072291374207, + "logps/chosen": -1.6598318815231323, + "logps/rejected": -1.9151239395141602, + "loss": 1.2762, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.3196637630462646, + "rewards/margins": 0.5105838775634766, + "rewards/rejected": -3.8302478790283203, + "step": 365 + }, + { + "epoch": 0.7738354806739346, + "grad_norm": 3.29367733001709, + "learning_rate": 1.4644660940672627e-07, + "logits/chosen": -0.6255152821540833, + "logits/rejected": -0.6178345680236816, + "logps/chosen": -1.7635339498519897, + "logps/rejected": -2.02409029006958, + "loss": 1.4469, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.5270678997039795, + "rewards/margins": 0.5211121439933777, + "rewards/rejected": -4.04818058013916, + "step": 366 + }, + { + "epoch": 0.7759497852659398, + "grad_norm": 3.500715732574463, + "learning_rate": 1.438367139018796e-07, + "logits/chosen": -0.6738446354866028, + "logits/rejected": -0.671849250793457, + "logps/chosen": -1.603959560394287, + "logps/rejected": -2.140998363494873, + "loss": 0.9771, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.207919120788574, + "rewards/margins": 1.0740783214569092, + "rewards/rejected": -4.281996726989746, + "step": 367 + }, + { + "epoch": 0.7780640898579452, + "grad_norm": 2.8842501640319824, + "learning_rate": 1.412463714778343e-07, + "logits/chosen": -0.6544129252433777, + "logits/rejected": -0.6667245030403137, + "logps/chosen": -1.7409751415252686, + "logps/rejected": -2.1441538333892822, + "loss": 1.1043, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.481950283050537, + "rewards/margins": 0.806357741355896, + "rewards/rejected": -4.2883076667785645, + "step": 368 + }, + { + "epoch": 0.7801783944499504, + "grad_norm": 3.7606077194213867, + "learning_rate": 1.3867572434232728e-07, + "logits/chosen": -0.6620441675186157, + "logits/rejected": -0.6536539793014526, + "logps/chosen": -1.6755543947219849, + "logps/rejected": -2.012425184249878, + "loss": 1.2249, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.3511087894439697, + "rewards/margins": 0.6737421154975891, + "rewards/rejected": -4.024850368499756, + "step": 369 + }, + { + "epoch": 0.7822926990419558, + "grad_norm": 3.284456729888916, + "learning_rate": 1.3612491362183887e-07, + "logits/chosen": -0.6353476047515869, + "logits/rejected": -0.6363587975502014, + "logps/chosen": -1.6001538038253784, + "logps/rejected": -2.0670526027679443, + "loss": 1.0746, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.200307607650757, + "rewards/margins": 0.9337971210479736, + "rewards/rejected": -4.134105205535889, + "step": 370 + }, + { + "epoch": 0.784407003633961, + "grad_norm": 1.9063444137573242, + "learning_rate": 1.3359407935384642e-07, + "logits/chosen": -0.6120063662528992, + "logits/rejected": -0.5794797539710999, + "logps/chosen": -1.4489734172821045, + "logps/rejected": -1.9216854572296143, + "loss": 1.0928, + "rewards/accuracies": 0.71875, + "rewards/chosen": -2.897946834564209, + "rewards/margins": 0.9454240798950195, + "rewards/rejected": -3.8433709144592285, + "step": 371 + }, + { + "epoch": 0.7865213082259663, + "grad_norm": 6.973724842071533, + "learning_rate": 1.3108336047913633e-07, + "logits/chosen": -0.6082984209060669, + "logits/rejected": -0.6162828207015991, + "logps/chosen": -1.7623229026794434, + "logps/rejected": -2.239130735397339, + "loss": 1.2665, + "rewards/accuracies": 0.59375, + "rewards/chosen": -3.5246458053588867, + "rewards/margins": 0.9536150693893433, + "rewards/rejected": -4.478261470794678, + "step": 372 + }, + { + "epoch": 0.7886356128179716, + "grad_norm": 3.874128580093384, + "learning_rate": 1.2859289483417557e-07, + "logits/chosen": -0.5540960431098938, + "logits/rejected": -0.5091680884361267, + "logps/chosen": -1.85587739944458, + "logps/rejected": -2.3959312438964844, + "loss": 1.0672, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.71175479888916, + "rewards/margins": 1.0801074504852295, + "rewards/rejected": -4.791862487792969, + "step": 373 + }, + { + "epoch": 0.7907499174099769, + "grad_norm": 13.771154403686523, + "learning_rate": 1.261228191435445e-07, + "logits/chosen": -0.599963903427124, + "logits/rejected": -0.5765703916549683, + "logps/chosen": -1.7974251508712769, + "logps/rejected": -2.2272088527679443, + "loss": 1.1994, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.5948503017425537, + "rewards/margins": 0.8595672249794006, + "rewards/rejected": -4.454417705535889, + "step": 374 + }, + { + "epoch": 0.7928642220019821, + "grad_norm": 2.5084969997406006, + "learning_rate": 1.2367326901243214e-07, + "logits/chosen": -0.5945304036140442, + "logits/rejected": -0.6021737456321716, + "logps/chosen": -1.9855573177337646, + "logps/rejected": -2.3953022956848145, + "loss": 1.2576, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.9711146354675293, + "rewards/margins": 0.8194906711578369, + "rewards/rejected": -4.790604591369629, + "step": 375 + }, + { + "epoch": 0.7949785265939875, + "grad_norm": 4.571497440338135, + "learning_rate": 1.2124437891918993e-07, + "logits/chosen": -0.5888144373893738, + "logits/rejected": -0.5575076937675476, + "logps/chosen": -1.8334908485412598, + "logps/rejected": -2.153212070465088, + "loss": 1.2104, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.6669816970825195, + "rewards/margins": 0.639442503452301, + "rewards/rejected": -4.306424140930176, + "step": 376 + }, + { + "epoch": 0.7970928311859927, + "grad_norm": 5.023235321044922, + "learning_rate": 1.1883628220795005e-07, + "logits/chosen": -0.632038414478302, + "logits/rejected": -0.6368271708488464, + "logps/chosen": -1.8573570251464844, + "logps/rejected": -2.291320323944092, + "loss": 1.1719, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.7147140502929688, + "rewards/margins": 0.8679270148277283, + "rewards/rejected": -4.582640647888184, + "step": 377 + }, + { + "epoch": 0.7992071357779981, + "grad_norm": 4.98567533493042, + "learning_rate": 1.1644911108130434e-07, + "logits/chosen": -0.5647228360176086, + "logits/rejected": -0.5541558265686035, + "logps/chosen": -1.8232372999191284, + "logps/rejected": -2.2992348670959473, + "loss": 1.1476, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.646474599838257, + "rewards/margins": 0.9519950747489929, + "rewards/rejected": -4.5984697341918945, + "step": 378 + }, + { + "epoch": 0.8013214403700033, + "grad_norm": 9.514540672302246, + "learning_rate": 1.1408299659304682e-07, + "logits/chosen": -0.5385195016860962, + "logits/rejected": -0.5475942492485046, + "logps/chosen": -2.077877998352051, + "logps/rejected": -2.4877052307128906, + "loss": 1.1605, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -4.155755996704102, + "rewards/margins": 0.8196545243263245, + "rewards/rejected": -4.975410461425781, + "step": 379 + }, + { + "epoch": 0.8034357449620085, + "grad_norm": 7.652558326721191, + "learning_rate": 1.1173806864097884e-07, + "logits/chosen": -0.5651392936706543, + "logits/rejected": -0.5097556114196777, + "logps/chosen": -1.9452521800994873, + "logps/rejected": -2.376047134399414, + "loss": 1.2004, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.8905043601989746, + "rewards/margins": 0.8615895509719849, + "rewards/rejected": -4.752094268798828, + "step": 380 + }, + { + "epoch": 0.8055500495540139, + "grad_norm": 6.184218406677246, + "learning_rate": 1.0941445595977766e-07, + "logits/chosen": -0.5738644599914551, + "logits/rejected": -0.570101797580719, + "logps/chosen": -2.0233359336853027, + "logps/rejected": -2.5829384326934814, + "loss": 1.1539, + "rewards/accuracies": 0.71875, + "rewards/chosen": -4.0466718673706055, + "rewards/margins": 1.1192048788070679, + "rewards/rejected": -5.165876865386963, + "step": 381 + }, + { + "epoch": 0.8076643541460191, + "grad_norm": 4.697547435760498, + "learning_rate": 1.0711228611392936e-07, + "logits/chosen": -0.5766915082931519, + "logits/rejected": -0.5619411468505859, + "logps/chosen": -2.0546395778656006, + "logps/rejected": -2.4459054470062256, + "loss": 1.2723, + "rewards/accuracies": 0.59375, + "rewards/chosen": -4.109279155731201, + "rewards/margins": 0.7825320959091187, + "rewards/rejected": -4.891810894012451, + "step": 382 + }, + { + "epoch": 0.8097786587380245, + "grad_norm": 5.595128536224365, + "learning_rate": 1.0483168549072518e-07, + "logits/chosen": -0.6808648109436035, + "logits/rejected": -0.6518751382827759, + "logps/chosen": -1.9909974336624146, + "logps/rejected": -2.3775596618652344, + "loss": 1.2501, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.981994867324829, + "rewards/margins": 0.7731241583824158, + "rewards/rejected": -4.755119323730469, + "step": 383 + }, + { + "epoch": 0.8118929633300297, + "grad_norm": 3.6460607051849365, + "learning_rate": 1.0257277929332331e-07, + "logits/chosen": -0.6901826858520508, + "logits/rejected": -0.703309953212738, + "logps/chosen": -1.9317903518676758, + "logps/rejected": -2.322279930114746, + "loss": 1.1945, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.8635807037353516, + "rewards/margins": 0.780979335308075, + "rewards/rejected": -4.644559860229492, + "step": 384 + }, + { + "epoch": 0.814007267922035, + "grad_norm": 8.366463661193848, + "learning_rate": 1.0033569153387561e-07, + "logits/chosen": -0.5720599293708801, + "logits/rejected": -0.5666248798370361, + "logps/chosen": -1.9946173429489136, + "logps/rejected": -2.3951826095581055, + "loss": 1.3349, + "rewards/accuracies": 0.546875, + "rewards/chosen": -3.989234685897827, + "rewards/margins": 0.8011305332183838, + "rewards/rejected": -4.790365219116211, + "step": 385 + }, + { + "epoch": 0.8161215725140403, + "grad_norm": 1.8578377962112427, + "learning_rate": 9.812054502671834e-08, + "logits/chosen": -0.6122175455093384, + "logits/rejected": -0.5665942430496216, + "logps/chosen": -2.1414878368377686, + "logps/rejected": -2.646432399749756, + "loss": 1.1834, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.282975673675537, + "rewards/margins": 1.009889006614685, + "rewards/rejected": -5.292864799499512, + "step": 386 + }, + { + "epoch": 0.8182358771060456, + "grad_norm": 4.7323408126831055, + "learning_rate": 9.592746138163093e-08, + "logits/chosen": -0.5390607118606567, + "logits/rejected": -0.5227072834968567, + "logps/chosen": -2.1249067783355713, + "logps/rejected": -2.688115119934082, + "loss": 1.2211, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.249813556671143, + "rewards/margins": 1.1264164447784424, + "rewards/rejected": -5.376230239868164, + "step": 387 + }, + { + "epoch": 0.8203501816980509, + "grad_norm": 2.5557284355163574, + "learning_rate": 9.375656099715934e-08, + "logits/chosen": -0.5654515027999878, + "logits/rejected": -0.5636597275733948, + "logps/chosen": -2.126107692718506, + "logps/rejected": -2.606684684753418, + "loss": 1.1995, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.252215385437012, + "rewards/margins": 0.9611539244651794, + "rewards/rejected": -5.213369369506836, + "step": 388 + }, + { + "epoch": 0.8224644862900562, + "grad_norm": 4.177574634552002, + "learning_rate": 9.160796305400609e-08, + "logits/chosen": -0.6432445645332336, + "logits/rejected": -0.6587055921554565, + "logps/chosen": -2.0785441398620605, + "logps/rejected": -2.4507219791412354, + "loss": 1.2339, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.157088279724121, + "rewards/margins": 0.7443561553955078, + "rewards/rejected": -4.901443958282471, + "step": 389 + }, + { + "epoch": 0.8245787908820614, + "grad_norm": 5.901131629943848, + "learning_rate": 8.9481785508487e-08, + "logits/chosen": -0.588135302066803, + "logits/rejected": -0.5850880742073059, + "logps/chosen": -2.225466251373291, + "logps/rejected": -2.638160467147827, + "loss": 1.2255, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.450932502746582, + "rewards/margins": 0.8253881335258484, + "rewards/rejected": -5.276320934295654, + "step": 390 + }, + { + "epoch": 0.8266930954740668, + "grad_norm": 2.727555751800537, + "learning_rate": 8.737814508605673e-08, + "logits/chosen": -0.5863823294639587, + "logits/rejected": -0.590294599533081, + "logps/chosen": -1.9851064682006836, + "logps/rejected": -2.579831600189209, + "loss": 1.0113, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.970212936401367, + "rewards/margins": 1.1894500255584717, + "rewards/rejected": -5.159663200378418, + "step": 391 + }, + { + "epoch": 0.828807400066072, + "grad_norm": 9.048048973083496, + "learning_rate": 8.529715727489912e-08, + "logits/chosen": -0.5600543022155762, + "logits/rejected": -0.5537065267562866, + "logps/chosen": -1.9846975803375244, + "logps/rejected": -2.2676990032196045, + "loss": 1.3045, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.969395160675049, + "rewards/margins": 0.5660032629966736, + "rewards/rejected": -4.535398006439209, + "step": 392 + }, + { + "epoch": 0.8309217046580774, + "grad_norm": 3.4390201568603516, + "learning_rate": 8.323893631958806e-08, + "logits/chosen": -0.6335893273353577, + "logits/rejected": -0.6190727949142456, + "logps/chosen": -1.908363938331604, + "logps/rejected": -2.510305166244507, + "loss": 1.0262, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.816727876663208, + "rewards/margins": 1.2038825750350952, + "rewards/rejected": -5.020610332489014, + "step": 393 + }, + { + "epoch": 0.8330360092500826, + "grad_norm": 5.347372531890869, + "learning_rate": 8.120359521481501e-08, + "logits/chosen": -0.6408874988555908, + "logits/rejected": -0.643690288066864, + "logps/chosen": -2.019606828689575, + "logps/rejected": -2.3068103790283203, + "loss": 1.3028, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.03921365737915, + "rewards/margins": 0.574406623840332, + "rewards/rejected": -4.613620758056641, + "step": 394 + }, + { + "epoch": 0.8351503138420878, + "grad_norm": 2.2186920642852783, + "learning_rate": 7.9191245699186e-08, + "logits/chosen": -0.7156819105148315, + "logits/rejected": -0.6814436316490173, + "logps/chosen": -2.108549118041992, + "logps/rejected": -2.608646869659424, + "loss": 1.2948, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.217098236083984, + "rewards/margins": 1.0001959800720215, + "rewards/rejected": -5.217293739318848, + "step": 395 + }, + { + "epoch": 0.8372646184340932, + "grad_norm": 2.6448726654052734, + "learning_rate": 7.720199824908691e-08, + "logits/chosen": -0.5753149390220642, + "logits/rejected": -0.6065633296966553, + "logps/chosen": -2.2337100505828857, + "logps/rejected": -2.6677160263061523, + "loss": 1.2273, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.4674201011657715, + "rewards/margins": 0.868012011051178, + "rewards/rejected": -5.335432052612305, + "step": 396 + }, + { + "epoch": 0.8393789230260984, + "grad_norm": 6.596648216247559, + "learning_rate": 7.523596207261907e-08, + "logits/chosen": -0.5432775616645813, + "logits/rejected": -0.4928567409515381, + "logps/chosen": -2.1113924980163574, + "logps/rejected": -2.482846975326538, + "loss": 1.319, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.222784996032715, + "rewards/margins": 0.7429092526435852, + "rewards/rejected": -4.965693950653076, + "step": 397 + }, + { + "epoch": 0.8414932276181037, + "grad_norm": 3.9646811485290527, + "learning_rate": 7.329324510360269e-08, + "logits/chosen": -0.5816119909286499, + "logits/rejected": -0.564030110836029, + "logps/chosen": -2.0296411514282227, + "logps/rejected": -2.5152456760406494, + "loss": 1.1645, + "rewards/accuracies": 0.703125, + "rewards/chosen": -4.059282302856445, + "rewards/margins": 0.9712092876434326, + "rewards/rejected": -5.030491352081299, + "step": 398 + }, + { + "epoch": 0.843607532210109, + "grad_norm": 2.7787463665008545, + "learning_rate": 7.137395399565249e-08, + "logits/chosen": -0.6342184543609619, + "logits/rejected": -0.6318203210830688, + "logps/chosen": -2.0209803581237793, + "logps/rejected": -2.5250658988952637, + "loss": 1.1822, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.041960716247559, + "rewards/margins": 1.0081708431243896, + "rewards/rejected": -5.050131797790527, + "step": 399 + }, + { + "epoch": 0.8457218368021143, + "grad_norm": 4.476524353027344, + "learning_rate": 6.947819411632222e-08, + "logits/chosen": -0.5809480547904968, + "logits/rejected": -0.5740150213241577, + "logps/chosen": -1.9072691202163696, + "logps/rejected": -2.2942898273468018, + "loss": 1.3214, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.8145382404327393, + "rewards/margins": 0.7740417718887329, + "rewards/rejected": -4.5885796546936035, + "step": 400 + }, + { + "epoch": 0.8478361413941196, + "grad_norm": 2.47866153717041, + "learning_rate": 6.760606954131965e-08, + "logits/chosen": -0.5540263652801514, + "logits/rejected": -0.5378059148788452, + "logps/chosen": -1.8337305784225464, + "logps/rejected": -2.264974594116211, + "loss": 1.2396, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.6674611568450928, + "rewards/margins": 0.8624882698059082, + "rewards/rejected": -4.529949188232422, + "step": 401 + }, + { + "epoch": 0.8499504459861249, + "grad_norm": 2.800645112991333, + "learning_rate": 6.575768304879292e-08, + "logits/chosen": -0.6384072303771973, + "logits/rejected": -0.6310533285140991, + "logps/chosen": -1.9723026752471924, + "logps/rejected": -2.3342039585113525, + "loss": 1.2746, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.9446053504943848, + "rewards/margins": 0.7238021492958069, + "rewards/rejected": -4.668407917022705, + "step": 402 + }, + { + "epoch": 0.8520647505781301, + "grad_norm": 2.794485092163086, + "learning_rate": 6.3933136113689e-08, + "logits/chosen": -0.7269207239151001, + "logits/rejected": -0.7003817558288574, + "logps/chosen": -1.8535553216934204, + "logps/rejected": -2.2630820274353027, + "loss": 1.1774, + "rewards/accuracies": 0.765625, + "rewards/chosen": -3.707110643386841, + "rewards/margins": 0.8190534114837646, + "rewards/rejected": -4.5261640548706055, + "step": 403 + }, + { + "epoch": 0.8541790551701355, + "grad_norm": 12.197257041931152, + "learning_rate": 6.213252890218162e-08, + "logits/chosen": -0.5296715497970581, + "logits/rejected": -0.5422269105911255, + "logps/chosen": -1.8217012882232666, + "logps/rejected": -2.3873071670532227, + "loss": 1.1467, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.643402576446533, + "rewards/margins": 1.1312119960784912, + "rewards/rejected": -4.774614334106445, + "step": 404 + }, + { + "epoch": 0.8562933597621407, + "grad_norm": 2.396972179412842, + "learning_rate": 6.03559602661729e-08, + "logits/chosen": -0.6527739763259888, + "logits/rejected": -0.645389199256897, + "logps/chosen": -1.9720454216003418, + "logps/rejected": -2.2900233268737793, + "loss": 1.3423, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.9440908432006836, + "rewards/margins": 0.6359554529190063, + "rewards/rejected": -4.580046653747559, + "step": 405 + }, + { + "epoch": 0.8584076643541461, + "grad_norm": 3.5759809017181396, + "learning_rate": 5.8603527737866307e-08, + "logits/chosen": -0.5955278277397156, + "logits/rejected": -0.583007276058197, + "logps/chosen": -1.835761547088623, + "logps/rejected": -2.2889809608459473, + "loss": 1.1015, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.671523094177246, + "rewards/margins": 0.9064393639564514, + "rewards/rejected": -4.5779619216918945, + "step": 406 + }, + { + "epoch": 0.8605219689461513, + "grad_norm": 8.514383316040039, + "learning_rate": 5.687532752441232e-08, + "logits/chosen": -0.6325979828834534, + "logits/rejected": -0.5895124077796936, + "logps/chosen": -2.0668628215789795, + "logps/rejected": -2.4919605255126953, + "loss": 1.2469, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.133725643157959, + "rewards/margins": 0.8501947522163391, + "rewards/rejected": -4.983921051025391, + "step": 407 + }, + { + "epoch": 0.8626362735381566, + "grad_norm": 2.7234861850738525, + "learning_rate": 5.517145450262639e-08, + "logits/chosen": -0.5355826616287231, + "logits/rejected": -0.5421631932258606, + "logps/chosen": -1.8649351596832275, + "logps/rejected": -2.5664312839508057, + "loss": 1.0119, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.729870319366455, + "rewards/margins": 1.4029927253723145, + "rewards/rejected": -5.132862567901611, + "step": 408 + }, + { + "epoch": 0.8647505781301619, + "grad_norm": 3.1693661212921143, + "learning_rate": 5.3492002213780754e-08, + "logits/chosen": -0.5687247514724731, + "logits/rejected": -0.5579267740249634, + "logps/chosen": -2.0369410514831543, + "logps/rejected": -2.4640278816223145, + "loss": 1.311, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.073882102966309, + "rewards/margins": 0.8541740775108337, + "rewards/rejected": -4.928055763244629, + "step": 409 + }, + { + "epoch": 0.8668648827221671, + "grad_norm": 1.8922606706619263, + "learning_rate": 5.183706285846873e-08, + "logits/chosen": -0.6247987151145935, + "logits/rejected": -0.6043509244918823, + "logps/chosen": -1.8121845722198486, + "logps/rejected": -2.2492425441741943, + "loss": 1.1291, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.6243691444396973, + "rewards/margins": 0.8741158843040466, + "rewards/rejected": -4.498485088348389, + "step": 410 + }, + { + "epoch": 0.8689791873141725, + "grad_norm": 5.305470943450928, + "learning_rate": 5.020672729154307e-08, + "logits/chosen": -0.5554785132408142, + "logits/rejected": -0.565819501876831, + "logps/chosen": -1.9100950956344604, + "logps/rejected": -2.4060237407684326, + "loss": 1.1576, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.820190191268921, + "rewards/margins": 0.9918570518493652, + "rewards/rejected": -4.812047481536865, + "step": 411 + }, + { + "epoch": 0.8710934919061777, + "grad_norm": 3.2367563247680664, + "learning_rate": 4.860108501712823e-08, + "logits/chosen": -0.6536320447921753, + "logits/rejected": -0.6901589035987854, + "logps/chosen": -1.9213619232177734, + "logps/rejected": -2.270475387573242, + "loss": 1.2711, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.842723846435547, + "rewards/margins": 0.6982269287109375, + "rewards/rejected": -4.540950775146484, + "step": 412 + }, + { + "epoch": 0.873207796498183, + "grad_norm": 4.2919135093688965, + "learning_rate": 4.7020224183706715e-08, + "logits/chosen": -0.7220910787582397, + "logits/rejected": -0.7015137672424316, + "logps/chosen": -1.7745577096939087, + "logps/rejected": -2.2485008239746094, + "loss": 1.0773, + "rewards/accuracies": 0.7421875, + "rewards/chosen": -3.5491154193878174, + "rewards/margins": 0.9478861093521118, + "rewards/rejected": -4.497001647949219, + "step": 413 + }, + { + "epoch": 0.8753221010901883, + "grad_norm": 6.373754501342773, + "learning_rate": 4.54642315792792e-08, + "logits/chosen": -0.6177189946174622, + "logits/rejected": -0.6176800727844238, + "logps/chosen": -1.8406522274017334, + "logps/rejected": -2.368619441986084, + "loss": 1.1582, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.681304454803467, + "rewards/margins": 1.0559337139129639, + "rewards/rejected": -4.737238883972168, + "step": 414 + }, + { + "epoch": 0.8774364056821936, + "grad_norm": 4.120994567871094, + "learning_rate": 4.3933192626600725e-08, + "logits/chosen": -0.5981518626213074, + "logits/rejected": -0.5846447348594666, + "logps/chosen": -1.9437062740325928, + "logps/rejected": -2.4175901412963867, + "loss": 1.1865, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.8874125480651855, + "rewards/margins": 0.9477680921554565, + "rewards/rejected": -4.835180282592773, + "step": 415 + }, + { + "epoch": 0.8795507102741988, + "grad_norm": 3.618441104888916, + "learning_rate": 4.242719137849077e-08, + "logits/chosen": -0.544365644454956, + "logits/rejected": -0.5385901927947998, + "logps/chosen": -1.8662028312683105, + "logps/rejected": -2.2550435066223145, + "loss": 1.2125, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.732405662536621, + "rewards/margins": 0.7776816487312317, + "rewards/rejected": -4.510087013244629, + "step": 416 + }, + { + "epoch": 0.8816650148662042, + "grad_norm": 8.518675804138184, + "learning_rate": 4.0946310513218726e-08, + "logits/chosen": -0.6048115491867065, + "logits/rejected": -0.5681714415550232, + "logps/chosen": -2.020745038986206, + "logps/rejected": -2.5642106533050537, + "loss": 1.1682, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.041490077972412, + "rewards/margins": 1.0869308710098267, + "rewards/rejected": -5.128421306610107, + "step": 417 + }, + { + "epoch": 0.8837793194582094, + "grad_norm": 4.693824768066406, + "learning_rate": 3.9490631329964554e-08, + "logits/chosen": -0.5653468370437622, + "logits/rejected": -0.5610933303833008, + "logps/chosen": -1.8477216958999634, + "logps/rejected": -2.280613660812378, + "loss": 1.2177, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.6954433917999268, + "rewards/margins": 0.8657836318016052, + "rewards/rejected": -4.561227321624756, + "step": 418 + }, + { + "epoch": 0.8858936240502148, + "grad_norm": 4.910251617431641, + "learning_rate": 3.806023374435663e-08, + "logits/chosen": -0.6456243991851807, + "logits/rejected": -0.6571968197822571, + "logps/chosen": -1.8414027690887451, + "logps/rejected": -2.2380261421203613, + "loss": 1.2081, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.6828055381774902, + "rewards/margins": 0.7932465076446533, + "rewards/rejected": -4.476052284240723, + "step": 419 + }, + { + "epoch": 0.88800792864222, + "grad_norm": 2.260300636291504, + "learning_rate": 3.665519628408331e-08, + "logits/chosen": -0.6023683547973633, + "logits/rejected": -0.6400430798530579, + "logps/chosen": -2.039283275604248, + "logps/rejected": -2.520536184310913, + "loss": 1.1629, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.078566551208496, + "rewards/margins": 0.962505042552948, + "rewards/rejected": -5.041072368621826, + "step": 420 + }, + { + "epoch": 0.8901222332342253, + "grad_norm": 2.411315679550171, + "learning_rate": 3.527559608458225e-08, + "logits/chosen": -0.6408150792121887, + "logits/rejected": -0.6065229177474976, + "logps/chosen": -1.91830313205719, + "logps/rejected": -2.378871440887451, + "loss": 1.1848, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.83660626411438, + "rewards/margins": 0.9211370944976807, + "rewards/rejected": -4.757742881774902, + "step": 421 + }, + { + "epoch": 0.8922365378262306, + "grad_norm": 8.43724250793457, + "learning_rate": 3.39215088848061e-08, + "logits/chosen": -0.5962439179420471, + "logits/rejected": -0.5975909233093262, + "logps/chosen": -1.9837861061096191, + "logps/rejected": -2.319769859313965, + "loss": 1.2026, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.9675722122192383, + "rewards/margins": 0.6719677448272705, + "rewards/rejected": -4.63953971862793, + "step": 422 + }, + { + "epoch": 0.8943508424182359, + "grad_norm": 2.261178731918335, + "learning_rate": 3.259300902306367e-08, + "logits/chosen": -0.6858331561088562, + "logits/rejected": -0.7034648060798645, + "logps/chosen": -1.8496602773666382, + "logps/rejected": -2.3583877086639404, + "loss": 1.1137, + "rewards/accuracies": 0.734375, + "rewards/chosen": -3.6993205547332764, + "rewards/margins": 1.0174546241760254, + "rewards/rejected": -4.716775417327881, + "step": 423 + }, + { + "epoch": 0.8964651470102412, + "grad_norm": 7.621473789215088, + "learning_rate": 3.129016943293955e-08, + "logits/chosen": -0.6037753224372864, + "logits/rejected": -0.5865834355354309, + "logps/chosen": -1.902024507522583, + "logps/rejected": -2.3152518272399902, + "loss": 1.2577, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.804049015045166, + "rewards/margins": 0.8264546394348145, + "rewards/rejected": -4.6305036544799805, + "step": 424 + }, + { + "epoch": 0.8985794516022465, + "grad_norm": 2.954953908920288, + "learning_rate": 3.001306163928985e-08, + "logits/chosen": -0.6682695746421814, + "logits/rejected": -0.6516857147216797, + "logps/chosen": -2.0923025608062744, + "logps/rejected": -2.4602210521698, + "loss": 1.3758, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.184605121612549, + "rewards/margins": 0.7358372211456299, + "rewards/rejected": -4.9204421043396, + "step": 425 + }, + { + "epoch": 0.9006937561942517, + "grad_norm": 4.746059894561768, + "learning_rate": 2.8761755754315663e-08, + "logits/chosen": -0.6213058829307556, + "logits/rejected": -0.6071665287017822, + "logps/chosen": -1.9309402704238892, + "logps/rejected": -2.3048858642578125, + "loss": 1.2216, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.8618805408477783, + "rewards/margins": 0.7478916049003601, + "rewards/rejected": -4.609771728515625, + "step": 426 + }, + { + "epoch": 0.902808060786257, + "grad_norm": 3.4567902088165283, + "learning_rate": 2.753632047371335e-08, + "logits/chosen": -0.5602300763130188, + "logits/rejected": -0.5994393825531006, + "logps/chosen": -2.0382192134857178, + "logps/rejected": -2.4620015621185303, + "loss": 1.1534, + "rewards/accuracies": 0.6875, + "rewards/chosen": -4.0764384269714355, + "rewards/margins": 0.8475649952888489, + "rewards/rejected": -4.9240031242370605, + "step": 427 + }, + { + "epoch": 0.9049223653782623, + "grad_norm": 8.650147438049316, + "learning_rate": 2.63368230729043e-08, + "logits/chosen": -0.6574521660804749, + "logits/rejected": -0.6474560499191284, + "logps/chosen": -2.01283860206604, + "logps/rejected": -2.3451762199401855, + "loss": 1.3337, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.02567720413208, + "rewards/margins": 0.6646751165390015, + "rewards/rejected": -4.690352439880371, + "step": 428 + }, + { + "epoch": 0.9070366699702676, + "grad_norm": 4.965768337249756, + "learning_rate": 2.5163329403340593e-08, + "logits/chosen": -0.632398784160614, + "logits/rejected": -0.6226595640182495, + "logps/chosen": -1.9954252243041992, + "logps/rejected": -2.415121555328369, + "loss": 1.1249, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.9908504486083984, + "rewards/margins": 0.8393926024436951, + "rewards/rejected": -4.830243110656738, + "step": 429 + }, + { + "epoch": 0.9091509745622729, + "grad_norm": 4.165818214416504, + "learning_rate": 2.4015903888890242e-08, + "logits/chosen": -0.6372086405754089, + "logits/rejected": -0.6573516130447388, + "logps/chosen": -1.9238042831420898, + "logps/rejected": -2.3672964572906494, + "loss": 1.1372, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.8476085662841797, + "rewards/margins": 0.8869843482971191, + "rewards/rejected": -4.734592914581299, + "step": 430 + }, + { + "epoch": 0.9112652791542781, + "grad_norm": 4.025818347930908, + "learning_rate": 2.289460952230038e-08, + "logits/chosen": -0.6017577648162842, + "logits/rejected": -0.5835919380187988, + "logps/chosen": -1.9263951778411865, + "logps/rejected": -2.364337921142578, + "loss": 1.1519, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.852790355682373, + "rewards/margins": 0.875885009765625, + "rewards/rejected": -4.728675842285156, + "step": 431 + }, + { + "epoch": 0.9133795837462835, + "grad_norm": 2.232624053955078, + "learning_rate": 2.1799507861738788e-08, + "logits/chosen": -0.697775661945343, + "logits/rejected": -0.7254015803337097, + "logps/chosen": -1.8258415460586548, + "logps/rejected": -2.089477777481079, + "loss": 1.3136, + "rewards/accuracies": 0.578125, + "rewards/chosen": -3.6516830921173096, + "rewards/margins": 0.5272722244262695, + "rewards/rejected": -4.178955554962158, + "step": 432 + }, + { + "epoch": 0.9154938883382887, + "grad_norm": 5.815128326416016, + "learning_rate": 2.073065902741472e-08, + "logits/chosen": -0.5873744487762451, + "logits/rejected": -0.5638723969459534, + "logps/chosen": -1.9891620874404907, + "logps/rejected": -2.4962096214294434, + "loss": 1.1379, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.9783241748809814, + "rewards/margins": 1.0140951871871948, + "rewards/rejected": -4.992419242858887, + "step": 433 + }, + { + "epoch": 0.917608192930294, + "grad_norm": 5.057411193847656, + "learning_rate": 1.9688121698277993e-08, + "logits/chosen": -0.607324481010437, + "logits/rejected": -0.5964059829711914, + "logps/chosen": -1.8643240928649902, + "logps/rejected": -2.2751855850219727, + "loss": 1.2388, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.7286481857299805, + "rewards/margins": 0.8217229843139648, + "rewards/rejected": -4.550371170043945, + "step": 434 + }, + { + "epoch": 0.9197224975222993, + "grad_norm": 2.25390362739563, + "learning_rate": 1.8671953108797823e-08, + "logits/chosen": -0.6268022656440735, + "logits/rejected": -0.6332954168319702, + "logps/chosen": -1.945924997329712, + "logps/rejected": -2.330981731414795, + "loss": 1.1455, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.891849994659424, + "rewards/margins": 0.7701136469841003, + "rewards/rejected": -4.66196346282959, + "step": 435 + }, + { + "epoch": 0.9218368021143046, + "grad_norm": 3.9572856426239014, + "learning_rate": 1.7682209045820684e-08, + "logits/chosen": -0.6522207856178284, + "logits/rejected": -0.6930267810821533, + "logps/chosen": -1.8152984380722046, + "logps/rejected": -2.0800223350524902, + "loss": 1.2978, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.630596876144409, + "rewards/margins": 0.5294479727745056, + "rewards/rejected": -4.1600446701049805, + "step": 436 + }, + { + "epoch": 0.9239511067063099, + "grad_norm": 1.733438491821289, + "learning_rate": 1.671894384550743e-08, + "logits/chosen": -0.5977643728256226, + "logits/rejected": -0.5842040777206421, + "logps/chosen": -1.8794972896575928, + "logps/rejected": -2.413329601287842, + "loss": 1.0233, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.7589945793151855, + "rewards/margins": 1.0676649808883667, + "rewards/rejected": -4.826659202575684, + "step": 437 + }, + { + "epoch": 0.9260654112983152, + "grad_norm": 2.8760743141174316, + "learning_rate": 1.5782210390350713e-08, + "logits/chosen": -0.5813508033752441, + "logits/rejected": -0.5602753758430481, + "logps/chosen": -1.7892794609069824, + "logps/rejected": -2.32309627532959, + "loss": 1.0836, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.578558921813965, + "rewards/margins": 1.0676335096359253, + "rewards/rejected": -4.64619255065918, + "step": 438 + }, + { + "epoch": 0.9281797158903204, + "grad_norm": 5.760490894317627, + "learning_rate": 1.4872060106271179e-08, + "logits/chosen": -0.5673117637634277, + "logits/rejected": -0.5580011606216431, + "logps/chosen": -1.943117618560791, + "logps/rejected": -2.4581894874572754, + "loss": 1.1229, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.886235237121582, + "rewards/margins": 1.030144453048706, + "rewards/rejected": -4.916378974914551, + "step": 439 + }, + { + "epoch": 0.9302940204823258, + "grad_norm": 5.213393211364746, + "learning_rate": 1.3988542959794625e-08, + "logits/chosen": -0.5715171098709106, + "logits/rejected": -0.5791775584220886, + "logps/chosen": -1.961305022239685, + "logps/rejected": -2.4485957622528076, + "loss": 1.0877, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.92261004447937, + "rewards/margins": 0.9745810627937317, + "rewards/rejected": -4.897191524505615, + "step": 440 + }, + { + "epoch": 0.932408325074331, + "grad_norm": 2.670029878616333, + "learning_rate": 1.3131707455309004e-08, + "logits/chosen": -0.6612206101417542, + "logits/rejected": -0.569149374961853, + "logps/chosen": -1.9947882890701294, + "logps/rejected": -2.41544771194458, + "loss": 1.2501, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.989576578140259, + "rewards/margins": 0.8413184881210327, + "rewards/rejected": -4.83089542388916, + "step": 441 + }, + { + "epoch": 0.9345226296663363, + "grad_norm": 2.0773093700408936, + "learning_rate": 1.230160063240121e-08, + "logits/chosen": -0.5475001335144043, + "logits/rejected": -0.6024526953697205, + "logps/chosen": -1.9972546100616455, + "logps/rejected": -2.2212231159210205, + "loss": 1.2857, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.994509220123291, + "rewards/margins": 0.4479368031024933, + "rewards/rejected": -4.442446231842041, + "step": 442 + }, + { + "epoch": 0.9366369342583416, + "grad_norm": 2.6185569763183594, + "learning_rate": 1.1498268063274697e-08, + "logits/chosen": -0.6600778102874756, + "logits/rejected": -0.6794160604476929, + "logps/chosen": -1.7303975820541382, + "logps/rejected": -2.0589568614959717, + "loss": 1.183, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.4607951641082764, + "rewards/margins": 0.6571190357208252, + "rewards/rejected": -4.117913722991943, + "step": 443 + }, + { + "epoch": 0.9387512388503468, + "grad_norm": 2.7385923862457275, + "learning_rate": 1.0721753850247984e-08, + "logits/chosen": -0.6136504411697388, + "logits/rejected": -0.5926402807235718, + "logps/chosen": -1.9593303203582764, + "logps/rejected": -2.446382999420166, + "loss": 1.161, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.9186606407165527, + "rewards/margins": 0.9741055965423584, + "rewards/rejected": -4.892765998840332, + "step": 444 + }, + { + "epoch": 0.9408655434423522, + "grad_norm": 2.006077527999878, + "learning_rate": 9.972100623333035e-09, + "logits/chosen": -0.5911227464675903, + "logits/rejected": -0.5988056063652039, + "logps/chosen": -1.9767932891845703, + "logps/rejected": -2.307847499847412, + "loss": 1.2698, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.9535865783691406, + "rewards/margins": 0.6621084809303284, + "rewards/rejected": -4.615694999694824, + "step": 445 + }, + { + "epoch": 0.9429798480343574, + "grad_norm": 3.775676965713501, + "learning_rate": 9.249349537894968e-09, + "logits/chosen": -0.5951496958732605, + "logits/rejected": -0.5602840185165405, + "logps/chosen": -2.01466965675354, + "logps/rejected": -2.404120922088623, + "loss": 1.3551, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.02933931350708, + "rewards/margins": 0.7789022922515869, + "rewards/rejected": -4.808241844177246, + "step": 446 + }, + { + "epoch": 0.9450941526263628, + "grad_norm": 10.657898902893066, + "learning_rate": 8.553540272392967e-09, + "logits/chosen": -0.616013765335083, + "logits/rejected": -0.6068493127822876, + "logps/chosen": -1.9523563385009766, + "logps/rejected": -2.3371798992156982, + "loss": 1.2264, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.904712677001953, + "rewards/margins": 0.7696471810340881, + "rewards/rejected": -4.6743597984313965, + "step": 447 + }, + { + "epoch": 0.947208457218368, + "grad_norm": 5.239955902099609, + "learning_rate": 7.884711026201584e-09, + "logits/chosen": -0.5559091567993164, + "logits/rejected": -0.5499454140663147, + "logps/chosen": -1.9888339042663574, + "logps/rejected": -2.5645201206207275, + "loss": 1.1615, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.977667808532715, + "rewards/margins": 1.1513725519180298, + "rewards/rejected": -5.129040241241455, + "step": 448 + }, + { + "epoch": 0.9493227618103733, + "grad_norm": 4.970836162567139, + "learning_rate": 7.242898517513863e-09, + "logits/chosen": -0.6270098686218262, + "logits/rejected": -0.5990616083145142, + "logps/chosen": -2.0393564701080322, + "logps/rejected": -2.6450533866882324, + "loss": 1.0316, + "rewards/accuracies": 0.7421875, + "rewards/chosen": -4.0787129402160645, + "rewards/margins": 1.2113933563232422, + "rewards/rejected": -5.290106773376465, + "step": 449 + }, + { + "epoch": 0.9514370664023786, + "grad_norm": 4.401031494140625, + "learning_rate": 6.62813798132561e-09, + "logits/chosen": -0.6103833913803101, + "logits/rejected": -0.6355498433113098, + "logps/chosen": -1.9900306463241577, + "logps/rejected": -2.4799742698669434, + "loss": 1.1272, + "rewards/accuracies": 0.734375, + "rewards/chosen": -3.9800612926483154, + "rewards/margins": 0.979887843132019, + "rewards/rejected": -4.959948539733887, + "step": 450 + }, + { + "epoch": 0.9535513709943839, + "grad_norm": 5.162088871002197, + "learning_rate": 6.040463167500509e-09, + "logits/chosen": -0.6351377367973328, + "logits/rejected": -0.6445170044898987, + "logps/chosen": -2.017266035079956, + "logps/rejected": -2.4103317260742188, + "loss": 1.2591, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -4.034532070159912, + "rewards/margins": 0.7861310243606567, + "rewards/rejected": -4.8206634521484375, + "step": 451 + }, + { + "epoch": 0.9556656755863892, + "grad_norm": 3.158773422241211, + "learning_rate": 5.4799063389179834e-09, + "logits/chosen": -0.6216992139816284, + "logits/rejected": -0.6317836046218872, + "logps/chosen": -1.9916179180145264, + "logps/rejected": -2.476783275604248, + "loss": 1.192, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.9832358360290527, + "rewards/margins": 0.970331072807312, + "rewards/rejected": -4.953566551208496, + "step": 452 + }, + { + "epoch": 0.9577799801783945, + "grad_norm": 4.7540435791015625, + "learning_rate": 4.946498269701616e-09, + "logits/chosen": -0.652457594871521, + "logits/rejected": -0.6148388385772705, + "logps/chosen": -2.0300891399383545, + "logps/rejected": -2.5610132217407227, + "loss": 1.0769, + "rewards/accuracies": 0.734375, + "rewards/chosen": -4.060178279876709, + "rewards/margins": 1.061848759651184, + "rewards/rejected": -5.122026443481445, + "step": 453 + }, + { + "epoch": 0.9598942847703997, + "grad_norm": 4.686556339263916, + "learning_rate": 4.440268243529666e-09, + "logits/chosen": -0.5588012337684631, + "logits/rejected": -0.5526341199874878, + "logps/chosen": -1.8666988611221313, + "logps/rejected": -2.3390815258026123, + "loss": 1.1768, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.7333977222442627, + "rewards/margins": 0.9447645545005798, + "rewards/rejected": -4.678163051605225, + "step": 454 + }, + { + "epoch": 0.9620085893624051, + "grad_norm": 2.740269422531128, + "learning_rate": 3.961244052027413e-09, + "logits/chosen": -0.6438521146774292, + "logits/rejected": -0.6682748198509216, + "logps/chosen": -2.0076475143432617, + "logps/rejected": -2.388810396194458, + "loss": 1.2689, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.015295028686523, + "rewards/margins": 0.7623259425163269, + "rewards/rejected": -4.777620792388916, + "step": 455 + }, + { + "epoch": 0.9641228939544103, + "grad_norm": 2.9197144508361816, + "learning_rate": 3.509451993241541e-09, + "logits/chosen": -0.5822494029998779, + "logits/rejected": -0.5853508114814758, + "logps/chosen": -1.8848122358322144, + "logps/rejected": -2.4192898273468018, + "loss": 1.0924, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.7696244716644287, + "rewards/margins": 1.0689555406570435, + "rewards/rejected": -4.8385796546936035, + "step": 456 + }, + { + "epoch": 0.9662371985464155, + "grad_norm": 4.501737117767334, + "learning_rate": 3.084916870196297e-09, + "logits/chosen": -0.5652188658714294, + "logits/rejected": -0.5740686655044556, + "logps/chosen": -1.9216543436050415, + "logps/rejected": -2.23102068901062, + "loss": 1.2907, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.843308687210083, + "rewards/margins": 0.618732750415802, + "rewards/rejected": -4.46204137802124, + "step": 457 + }, + { + "epoch": 0.9683515031384209, + "grad_norm": 3.512376070022583, + "learning_rate": 2.687661989531964e-09, + "logits/chosen": -0.6515664458274841, + "logits/rejected": -0.6550417542457581, + "logps/chosen": -1.9334843158721924, + "logps/rejected": -2.2688543796539307, + "loss": 1.2578, + "rewards/accuracies": 0.578125, + "rewards/chosen": -3.8669686317443848, + "rewards/margins": 0.67074054479599, + "rewards/rejected": -4.537708759307861, + "step": 458 + }, + { + "epoch": 0.9704658077304261, + "grad_norm": 2.165844678878784, + "learning_rate": 2.3177091602251675e-09, + "logits/chosen": -0.6218724250793457, + "logits/rejected": -0.5920112729072571, + "logps/chosen": -1.8584281206130981, + "logps/rejected": -2.366225242614746, + "loss": 1.1553, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.7168562412261963, + "rewards/margins": 1.0155941247940063, + "rewards/rejected": -4.732450485229492, + "step": 459 + }, + { + "epoch": 0.9725801123224315, + "grad_norm": 1.7227884531021118, + "learning_rate": 1.975078692391552e-09, + "logits/chosen": -0.5791985988616943, + "logits/rejected": -0.5785022974014282, + "logps/chosen": -1.8981022834777832, + "logps/rejected": -2.3716633319854736, + "loss": 1.1642, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.7962045669555664, + "rewards/margins": 0.9471220970153809, + "rewards/rejected": -4.743326663970947, + "step": 460 + }, + { + "epoch": 0.9746944169144367, + "grad_norm": 3.150090217590332, + "learning_rate": 1.659789396171063e-09, + "logits/chosen": -0.6548072695732117, + "logits/rejected": -0.6290433406829834, + "logps/chosen": -2.0168204307556152, + "logps/rejected": -2.520479679107666, + "loss": 1.0736, + "rewards/accuracies": 0.765625, + "rewards/chosen": -4.0336408615112305, + "rewards/margins": 1.0073186159133911, + "rewards/rejected": -5.040959358215332, + "step": 461 + }, + { + "epoch": 0.976808721506442, + "grad_norm": 1.256157636642456, + "learning_rate": 1.37185858069494e-09, + "logits/chosen": -0.7094852328300476, + "logits/rejected": -0.7226460576057434, + "logps/chosen": -1.8896048069000244, + "logps/rejected": -2.4871973991394043, + "loss": 1.0536, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.779209613800049, + "rewards/margins": 1.1951854228973389, + "rewards/rejected": -4.974394798278809, + "step": 462 + }, + { + "epoch": 0.9789230260984473, + "grad_norm": 2.8358895778656006, + "learning_rate": 1.1113020531357541e-09, + "logits/chosen": -0.6778469085693359, + "logits/rejected": -0.6957201957702637, + "logps/chosen": -2.0275380611419678, + "logps/rejected": -2.470618963241577, + "loss": 1.1801, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -4.0550761222839355, + "rewards/margins": 0.886161208152771, + "rewards/rejected": -4.941237926483154, + "step": 463 + }, + { + "epoch": 0.9810373306904526, + "grad_norm": 2.8881914615631104, + "learning_rate": 8.781341178393242e-10, + "logits/chosen": -0.5639821887016296, + "logits/rejected": -0.5891467928886414, + "logps/chosen": -2.0047199726104736, + "logps/rejected": -2.522782802581787, + "loss": 1.1948, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.009439945220947, + "rewards/margins": 1.0361257791519165, + "rewards/rejected": -5.045565605163574, + "step": 464 + }, + { + "epoch": 0.9831516352824579, + "grad_norm": 3.421194314956665, + "learning_rate": 6.723675755396229e-10, + "logits/chosen": -0.540326714515686, + "logits/rejected": -0.5159227252006531, + "logps/chosen": -1.88228178024292, + "logps/rejected": -2.2003138065338135, + "loss": 1.2191, + "rewards/accuracies": 0.59375, + "rewards/chosen": -3.76456356048584, + "rewards/margins": 0.6360642313957214, + "rewards/rejected": -4.400627613067627, + "step": 465 + }, + { + "epoch": 0.9852659398744632, + "grad_norm": 4.243066310882568, + "learning_rate": 4.940137226560615e-10, + "logits/chosen": -0.6175463795661926, + "logits/rejected": -0.6400432586669922, + "logps/chosen": -1.9547748565673828, + "logps/rejected": -2.4598965644836426, + "loss": 1.2589, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.9095497131347656, + "rewards/margins": 1.0102434158325195, + "rewards/rejected": -4.919793128967285, + "step": 466 + }, + { + "epoch": 0.9873802444664684, + "grad_norm": 3.3425028324127197, + "learning_rate": 3.430823506730962e-10, + "logits/chosen": -0.5236034393310547, + "logits/rejected": -0.48699086904525757, + "logps/chosen": -2.167372465133667, + "logps/rejected": -2.687620162963867, + "loss": 1.2024, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -4.334744930267334, + "rewards/margins": 1.0404952764511108, + "rewards/rejected": -5.375240325927734, + "step": 467 + }, + { + "epoch": 0.9894945490584738, + "grad_norm": 3.1803112030029297, + "learning_rate": 2.1958174560282594e-10, + "logits/chosen": -0.6515716910362244, + "logits/rejected": -0.6526726484298706, + "logps/chosen": -2.0350496768951416, + "logps/rejected": -2.4857177734375, + "loss": 1.1524, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.070099353790283, + "rewards/margins": 0.9013361930847168, + "rewards/rejected": -4.971435546875, + "step": 468 + }, + { + "epoch": 0.991608853650479, + "grad_norm": 2.8402769565582275, + "learning_rate": 1.2351868753018858e-10, + "logits/chosen": -0.5555111765861511, + "logits/rejected": -0.5084383487701416, + "logps/chosen": -1.9741497039794922, + "logps/rejected": -2.5360653400421143, + "loss": 1.0956, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.9482994079589844, + "rewards/margins": 1.1238315105438232, + "rewards/rejected": -5.0721306800842285, + "step": 469 + }, + { + "epoch": 0.9937231582424844, + "grad_norm": 14.110418319702148, + "learning_rate": 5.4898450240536964e-11, + "logits/chosen": -0.6210866570472717, + "logits/rejected": -0.614806056022644, + "logps/chosen": -2.0763094425201416, + "logps/rejected": -2.5026116371154785, + "loss": 1.2184, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -4.152618885040283, + "rewards/margins": 0.8526046276092529, + "rewards/rejected": -5.005223274230957, + "step": 470 + }, + { + "epoch": 0.9958374628344896, + "grad_norm": 2.8393566608428955, + "learning_rate": 1.3724800930314805e-11, + "logits/chosen": -0.5895847678184509, + "logits/rejected": -0.6269129514694214, + "logps/chosen": -1.8787530660629272, + "logps/rejected": -2.4467647075653076, + "loss": 1.0714, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.7575061321258545, + "rewards/margins": 1.1360235214233398, + "rewards/rejected": -4.893529415130615, + "step": 471 + }, + { + "epoch": 0.9979517674264948, + "grad_norm": 3.9959075450897217, + "learning_rate": 0.0, + "logits/chosen": -0.6461910009384155, + "logits/rejected": -0.6503991484642029, + "logps/chosen": -1.798724889755249, + "logps/rejected": -2.3589823246002197, + "loss": 1.0133, + "rewards/accuracies": 0.7421875, + "rewards/chosen": -3.597449779510498, + "rewards/margins": 1.1205153465270996, + "rewards/rejected": -4.7179646492004395, + "step": 472 + } + ], + "logging_steps": 1, + "max_steps": 472, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-64/config.json b/checkpoint-64/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ead276a9e0bbd4a0baae360715d8e80853974eb6 --- /dev/null +++ b/checkpoint-64/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "/home/v-liuzichuan/huggingface/Llama-3.2-3B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/checkpoint-64/generation_config.json b/checkpoint-64/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18d16b4cc3c799dd78e854fd824ddf8a2996285 --- /dev/null +++ b/checkpoint-64/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.2" +} diff --git a/checkpoint-64/model-00001-of-00002.safetensors b/checkpoint-64/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07ceff3f626491a3a531d254af869eb3c41f3e18 --- /dev/null +++ b/checkpoint-64/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6940dfe7ccb6e431c577a5649abf1e8627f247b0d7217c8426f824f20858b5cb +size 4965799096 diff --git a/checkpoint-64/model-00002-of-00002.safetensors b/checkpoint-64/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57c75f87e318fb2da589d84d8fc7223fa910f666 --- /dev/null +++ b/checkpoint-64/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3af0b4375ce36d8a2873f7dcabdfd3b2d1a21dcf1457c1f45f98e4ff15a56a6 +size 2247734992 diff --git a/checkpoint-64/model.safetensors.index.json b/checkpoint-64/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ed64de846d720b9a7859dc20575fea8e8ca51940 --- /dev/null +++ b/checkpoint-64/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_size": 7213504512 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-64/special_tokens_map.json b/checkpoint-64/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-64/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-64/tokenizer.json b/checkpoint-64/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-64/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-64/tokenizer_config.json b/checkpoint-64/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdb7bfcc59fd293867d98e124a30b3ddf39c93 --- /dev/null +++ b/checkpoint-64/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-64/trainer_state.json b/checkpoint-64/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4c5b4c719ed4ac1804b0c0d221d93f37664314b0 --- /dev/null +++ b/checkpoint-64/trainer_state.json @@ -0,0 +1,993 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1353154938883383, + "eval_steps": 500, + "global_step": 64, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002114304592005286, + "grad_norm": 1.5018059015274048, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.3466828167438507, + "logits/rejected": -0.30099987983703613, + "logps/chosen": -0.9345186948776245, + "logps/rejected": -0.9117153882980347, + "loss": 1.4889, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.869037389755249, + "rewards/margins": -0.04560665041208267, + "rewards/rejected": -1.8234307765960693, + "step": 1 + }, + { + "epoch": 0.004228609184010572, + "grad_norm": 0.8093975186347961, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.4310421049594879, + "logits/rejected": -0.39132067561149597, + "logps/chosen": -0.8198825716972351, + "logps/rejected": -0.8644211888313293, + "loss": 1.376, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.6397651433944702, + "rewards/margins": 0.08907715976238251, + "rewards/rejected": -1.7288423776626587, + "step": 2 + }, + { + "epoch": 0.006342913776015857, + "grad_norm": 0.5377389788627625, + "learning_rate": 6.25e-08, + "logits/chosen": -0.46692028641700745, + "logits/rejected": -0.4649256467819214, + "logps/chosen": -0.9087910652160645, + "logps/rejected": -0.9648240804672241, + "loss": 1.3404, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.817582130432129, + "rewards/margins": 0.11206617206335068, + "rewards/rejected": -1.9296481609344482, + "step": 3 + }, + { + "epoch": 0.008457218368021144, + "grad_norm": 0.3221875727176666, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.416828453540802, + "logits/rejected": -0.3584724962711334, + "logps/chosen": -0.7818898558616638, + "logps/rejected": -0.8170815110206604, + "loss": 1.3806, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.5637797117233276, + "rewards/margins": 0.07038339227437973, + "rewards/rejected": -1.6341630220413208, + "step": 4 + }, + { + "epoch": 0.010571522960026428, + "grad_norm": 0.64655601978302, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.376886248588562, + "logits/rejected": -0.3516141474246979, + "logps/chosen": -0.8814125061035156, + "logps/rejected": -1.0214396715164185, + "loss": 1.2741, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7628250122070312, + "rewards/margins": 0.28005433082580566, + "rewards/rejected": -2.042879343032837, + "step": 5 + }, + { + "epoch": 0.012685827552031714, + "grad_norm": 0.4775894582271576, + "learning_rate": 1.25e-07, + "logits/chosen": -0.4757865369319916, + "logits/rejected": -0.4498941898345947, + "logps/chosen": -0.8962199687957764, + "logps/rejected": -0.9462199807167053, + "loss": 1.364, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.7924399375915527, + "rewards/margins": 0.10000008344650269, + "rewards/rejected": -1.8924399614334106, + "step": 6 + }, + { + "epoch": 0.014800132144037, + "grad_norm": 1.2459568977355957, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.38895344734191895, + "logits/rejected": -0.38165366649627686, + "logps/chosen": -0.9025766253471375, + "logps/rejected": -0.9465017318725586, + "loss": 1.3898, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.805153250694275, + "rewards/margins": 0.0878501906991005, + "rewards/rejected": -1.8930034637451172, + "step": 7 + }, + { + "epoch": 0.016914436736042288, + "grad_norm": 0.6195729374885559, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.3964853286743164, + "logits/rejected": -0.377862811088562, + "logps/chosen": -0.9054160118103027, + "logps/rejected": -0.9605879187583923, + "loss": 1.3821, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8108320236206055, + "rewards/margins": 0.1103438138961792, + "rewards/rejected": -1.9211758375167847, + "step": 8 + }, + { + "epoch": 0.019028741328047574, + "grad_norm": 1.2074137926101685, + "learning_rate": 1.875e-07, + "logits/chosen": -0.3729037344455719, + "logits/rejected": -0.38143450021743774, + "logps/chosen": -0.9328653216362, + "logps/rejected": -0.9905799627304077, + "loss": 1.3754, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8657306432724, + "rewards/margins": 0.11542946100234985, + "rewards/rejected": -1.9811599254608154, + "step": 9 + }, + { + "epoch": 0.021143045920052856, + "grad_norm": 0.2867220640182495, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.4263336658477783, + "logits/rejected": -0.42903271317481995, + "logps/chosen": -0.8979260325431824, + "logps/rejected": -0.9078099727630615, + "loss": 1.4438, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7958520650863647, + "rewards/margins": 0.019767940044403076, + "rewards/rejected": -1.815619945526123, + "step": 10 + }, + { + "epoch": 0.023257350512058142, + "grad_norm": 0.8363026976585388, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.3374914526939392, + "logits/rejected": -0.32399696111679077, + "logps/chosen": -0.8886098861694336, + "logps/rejected": -0.9484556317329407, + "loss": 1.3422, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7772197723388672, + "rewards/margins": 0.11969132721424103, + "rewards/rejected": -1.8969112634658813, + "step": 11 + }, + { + "epoch": 0.025371655104063428, + "grad_norm": 0.5406804084777832, + "learning_rate": 2.5e-07, + "logits/chosen": -0.42844679951667786, + "logits/rejected": -0.37984615564346313, + "logps/chosen": -0.861629843711853, + "logps/rejected": -0.8968492150306702, + "loss": 1.3922, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.723259687423706, + "rewards/margins": 0.07043875753879547, + "rewards/rejected": -1.7936984300613403, + "step": 12 + }, + { + "epoch": 0.027485959696068714, + "grad_norm": 0.9919329285621643, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.36495402455329895, + "logits/rejected": -0.3249490261077881, + "logps/chosen": -0.8502095937728882, + "logps/rejected": -0.8470643758773804, + "loss": 1.4334, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7004191875457764, + "rewards/margins": -0.006290358491241932, + "rewards/rejected": -1.6941287517547607, + "step": 13 + }, + { + "epoch": 0.029600264288074, + "grad_norm": 0.5477162003517151, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.4155704081058502, + "logits/rejected": -0.39535820484161377, + "logps/chosen": -1.0430240631103516, + "logps/rejected": -1.1318373680114746, + "loss": 1.3533, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.086048126220703, + "rewards/margins": 0.17762640118598938, + "rewards/rejected": -2.263674736022949, + "step": 14 + }, + { + "epoch": 0.031714568880079286, + "grad_norm": 0.26530712842941284, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.4810572564601898, + "logits/rejected": -0.42454615235328674, + "logps/chosen": -0.8741041421890259, + "logps/rejected": -0.9494178295135498, + "loss": 1.3655, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7482082843780518, + "rewards/margins": 0.15062758326530457, + "rewards/rejected": -1.8988356590270996, + "step": 15 + }, + { + "epoch": 0.033828873472084575, + "grad_norm": 0.9272629618644714, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.4440098702907562, + "logits/rejected": -0.3930297791957855, + "logps/chosen": -0.8473359942436218, + "logps/rejected": -0.9369213581085205, + "loss": 1.3248, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6946719884872437, + "rewards/margins": 0.17917080223560333, + "rewards/rejected": -1.873842716217041, + "step": 16 + }, + { + "epoch": 0.03594317806408986, + "grad_norm": 0.5912418961524963, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.3838099539279938, + "logits/rejected": -0.3507584035396576, + "logps/chosen": -0.8888350129127502, + "logps/rejected": -0.9361770749092102, + "loss": 1.383, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.7776700258255005, + "rewards/margins": 0.0946839451789856, + "rewards/rejected": -1.8723541498184204, + "step": 17 + }, + { + "epoch": 0.03805748265609515, + "grad_norm": 0.6536504030227661, + "learning_rate": 3.75e-07, + "logits/chosen": -0.3581697940826416, + "logits/rejected": -0.3620460629463196, + "logps/chosen": -0.8519617319107056, + "logps/rejected": -0.9022184610366821, + "loss": 1.3841, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7039234638214111, + "rewards/margins": 0.10051343590021133, + "rewards/rejected": -1.8044369220733643, + "step": 18 + }, + { + "epoch": 0.04017178724810043, + "grad_norm": 0.3433632552623749, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.37887442111968994, + "logits/rejected": -0.37543320655822754, + "logps/chosen": -0.9464104175567627, + "logps/rejected": -1.0017329454421997, + "loss": 1.3649, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8928208351135254, + "rewards/margins": 0.11064518243074417, + "rewards/rejected": -2.0034658908843994, + "step": 19 + }, + { + "epoch": 0.04228609184010571, + "grad_norm": 0.9764007329940796, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.44110679626464844, + "logits/rejected": -0.4280649721622467, + "logps/chosen": -0.9046768546104431, + "logps/rejected": -1.0464633703231812, + "loss": 1.2592, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.8093537092208862, + "rewards/margins": 0.2835729420185089, + "rewards/rejected": -2.0929267406463623, + "step": 20 + }, + { + "epoch": 0.044400396432111, + "grad_norm": 1.8563830852508545, + "learning_rate": 4.375e-07, + "logits/chosen": -0.45183491706848145, + "logits/rejected": -0.42935287952423096, + "logps/chosen": -0.9043138027191162, + "logps/rejected": -0.9462392926216125, + "loss": 1.3784, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.8086276054382324, + "rewards/margins": 0.08385094255208969, + "rewards/rejected": -1.892478585243225, + "step": 21 + }, + { + "epoch": 0.046514701024116284, + "grad_norm": 1.3473299741744995, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.37855517864227295, + "logits/rejected": -0.34429043531417847, + "logps/chosen": -0.9284683465957642, + "logps/rejected": -0.9454050064086914, + "loss": 1.4346, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.8569366931915283, + "rewards/margins": 0.03387312963604927, + "rewards/rejected": -1.8908100128173828, + "step": 22 + }, + { + "epoch": 0.04862900561612157, + "grad_norm": 0.940831184387207, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.39172160625457764, + "logits/rejected": -0.3695780634880066, + "logps/chosen": -0.9314202666282654, + "logps/rejected": -1.020229697227478, + "loss": 1.3322, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8628405332565308, + "rewards/margins": 0.17761869728565216, + "rewards/rejected": -2.040459394454956, + "step": 23 + }, + { + "epoch": 0.050743310208126856, + "grad_norm": 0.5783158540725708, + "learning_rate": 5e-07, + "logits/chosen": -0.4958629608154297, + "logits/rejected": -0.4257377088069916, + "logps/chosen": -0.9379237294197083, + "logps/rejected": -0.9415461421012878, + "loss": 1.441, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.8758474588394165, + "rewards/margins": 0.0072449808940291405, + "rewards/rejected": -1.8830922842025757, + "step": 24 + }, + { + "epoch": 0.052857614800132145, + "grad_norm": 1.4209853410720825, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -0.36407172679901123, + "logits/rejected": -0.3331725299358368, + "logps/chosen": -0.9192589521408081, + "logps/rejected": -0.9595308899879456, + "loss": 1.3994, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8385179042816162, + "rewards/margins": 0.080544114112854, + "rewards/rejected": -1.9190617799758911, + "step": 25 + }, + { + "epoch": 0.05497191939213743, + "grad_norm": 0.6310216188430786, + "learning_rate": 5.416666666666666e-07, + "logits/chosen": -0.41772690415382385, + "logits/rejected": -0.36565953493118286, + "logps/chosen": -0.8052878379821777, + "logps/rejected": -0.8673746585845947, + "loss": 1.3356, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6105756759643555, + "rewards/margins": 0.12417369335889816, + "rewards/rejected": -1.7347493171691895, + "step": 26 + }, + { + "epoch": 0.05708622398414272, + "grad_norm": 1.2933462858200073, + "learning_rate": 5.625e-07, + "logits/chosen": -0.4482795000076294, + "logits/rejected": -0.39409321546554565, + "logps/chosen": -0.8339261412620544, + "logps/rejected": -0.8675202131271362, + "loss": 1.3739, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.6678522825241089, + "rewards/margins": 0.06718815863132477, + "rewards/rejected": -1.7350404262542725, + "step": 27 + }, + { + "epoch": 0.059200528576148, + "grad_norm": 0.5808025002479553, + "learning_rate": 5.833333333333334e-07, + "logits/chosen": -0.37116044759750366, + "logits/rejected": -0.3478051722049713, + "logps/chosen": -0.8950318694114685, + "logps/rejected": -0.9756672978401184, + "loss": 1.3505, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.790063738822937, + "rewards/margins": 0.1612708568572998, + "rewards/rejected": -1.9513345956802368, + "step": 28 + }, + { + "epoch": 0.06131483316815329, + "grad_norm": 1.0569533109664917, + "learning_rate": 6.041666666666666e-07, + "logits/chosen": -0.421148419380188, + "logits/rejected": -0.38443076610565186, + "logps/chosen": -0.8021283745765686, + "logps/rejected": -0.8370179533958435, + "loss": 1.3916, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.6042567491531372, + "rewards/margins": 0.06977920234203339, + "rewards/rejected": -1.674035906791687, + "step": 29 + }, + { + "epoch": 0.06342913776015857, + "grad_norm": 0.42577147483825684, + "learning_rate": 6.249999999999999e-07, + "logits/chosen": -0.4429818391799927, + "logits/rejected": -0.3524704575538635, + "logps/chosen": -0.8916822671890259, + "logps/rejected": -0.8985542058944702, + "loss": 1.4321, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.7833645343780518, + "rewards/margins": 0.01374388113617897, + "rewards/rejected": -1.7971084117889404, + "step": 30 + }, + { + "epoch": 0.06554344235216386, + "grad_norm": 1.0056904554367065, + "learning_rate": 6.458333333333333e-07, + "logits/chosen": -0.376451700925827, + "logits/rejected": -0.342519074678421, + "logps/chosen": -0.9038617014884949, + "logps/rejected": -0.953092634677887, + "loss": 1.398, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8077234029769897, + "rewards/margins": 0.09846188127994537, + "rewards/rejected": -1.906185269355774, + "step": 31 + }, + { + "epoch": 0.06765774694416915, + "grad_norm": 0.5494012236595154, + "learning_rate": 6.666666666666666e-07, + "logits/chosen": -0.3459138870239258, + "logits/rejected": -0.3590989410877228, + "logps/chosen": -0.8274999260902405, + "logps/rejected": -0.8776509761810303, + "loss": 1.363, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.654999852180481, + "rewards/margins": 0.1003020703792572, + "rewards/rejected": -1.7553019523620605, + "step": 32 + }, + { + "epoch": 0.06977205153617443, + "grad_norm": 0.693267822265625, + "learning_rate": 6.875e-07, + "logits/chosen": -0.40053680539131165, + "logits/rejected": -0.37323904037475586, + "logps/chosen": -0.8255244493484497, + "logps/rejected": -0.8658804893493652, + "loss": 1.3712, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6510488986968994, + "rewards/margins": 0.08071210980415344, + "rewards/rejected": -1.7317609786987305, + "step": 33 + }, + { + "epoch": 0.07188635612817972, + "grad_norm": 2.213238000869751, + "learning_rate": 7.083333333333334e-07, + "logits/chosen": -0.40097948908805847, + "logits/rejected": -0.38190510869026184, + "logps/chosen": -0.9122671484947205, + "logps/rejected": -0.9549552798271179, + "loss": 1.36, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.824534296989441, + "rewards/margins": 0.0853763073682785, + "rewards/rejected": -1.9099105596542358, + "step": 34 + }, + { + "epoch": 0.074000660720185, + "grad_norm": 0.6859830021858215, + "learning_rate": 7.291666666666666e-07, + "logits/chosen": -0.42501094937324524, + "logits/rejected": -0.42549416422843933, + "logps/chosen": -1.0008373260498047, + "logps/rejected": -1.1157118082046509, + "loss": 1.3294, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.0016746520996094, + "rewards/margins": 0.22974897921085358, + "rewards/rejected": -2.2314236164093018, + "step": 35 + }, + { + "epoch": 0.0761149653121903, + "grad_norm": 0.6468721628189087, + "learning_rate": 7.5e-07, + "logits/chosen": -0.36494994163513184, + "logits/rejected": -0.30433908104896545, + "logps/chosen": -0.9062094688415527, + "logps/rejected": -0.920263409614563, + "loss": 1.4312, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.8124189376831055, + "rewards/margins": 0.02810765616595745, + "rewards/rejected": -1.840526819229126, + "step": 36 + }, + { + "epoch": 0.07822926990419557, + "grad_norm": 0.5085556507110596, + "learning_rate": 7.708333333333333e-07, + "logits/chosen": -0.4677881598472595, + "logits/rejected": -0.456132709980011, + "logps/chosen": -1.0101865530014038, + "logps/rejected": -1.0429682731628418, + "loss": 1.4132, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -2.0203731060028076, + "rewards/margins": 0.06556359678506851, + "rewards/rejected": -2.0859365463256836, + "step": 37 + }, + { + "epoch": 0.08034357449620086, + "grad_norm": 0.23813335597515106, + "learning_rate": 7.916666666666666e-07, + "logits/chosen": -0.3991190791130066, + "logits/rejected": -0.3664044141769409, + "logps/chosen": -0.9578174352645874, + "logps/rejected": -0.9229263067245483, + "loss": 1.4824, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.9156348705291748, + "rewards/margins": -0.06978224962949753, + "rewards/rejected": -1.8458526134490967, + "step": 38 + }, + { + "epoch": 0.08245787908820615, + "grad_norm": 0.587037980556488, + "learning_rate": 8.125e-07, + "logits/chosen": -0.37554049491882324, + "logits/rejected": -0.36305734515190125, + "logps/chosen": -0.8503091931343079, + "logps/rejected": -0.864615261554718, + "loss": 1.4086, + "rewards/accuracies": 0.4296875, + "rewards/chosen": -1.7006183862686157, + "rewards/margins": 0.028611989691853523, + "rewards/rejected": -1.729230523109436, + "step": 39 + }, + { + "epoch": 0.08457218368021142, + "grad_norm": 0.4172501862049103, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.4405443072319031, + "logits/rejected": -0.41723060607910156, + "logps/chosen": -0.8502858877182007, + "logps/rejected": -0.9114271402359009, + "loss": 1.3446, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7005717754364014, + "rewards/margins": 0.12228240817785263, + "rewards/rejected": -1.8228542804718018, + "step": 40 + }, + { + "epoch": 0.08668648827221671, + "grad_norm": 0.9275372624397278, + "learning_rate": 8.541666666666666e-07, + "logits/chosen": -0.4200601577758789, + "logits/rejected": -0.3478623628616333, + "logps/chosen": -0.892408013343811, + "logps/rejected": -0.9276402592658997, + "loss": 1.3887, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.784816026687622, + "rewards/margins": 0.07046431303024292, + "rewards/rejected": -1.8552805185317993, + "step": 41 + }, + { + "epoch": 0.088800792864222, + "grad_norm": 0.7317383289337158, + "learning_rate": 8.75e-07, + "logits/chosen": -0.37675267457962036, + "logits/rejected": -0.33540332317352295, + "logps/chosen": -0.7866061925888062, + "logps/rejected": -0.824250340461731, + "loss": 1.3837, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5732123851776123, + "rewards/margins": 0.07528844475746155, + "rewards/rejected": -1.648500680923462, + "step": 42 + }, + { + "epoch": 0.09091509745622729, + "grad_norm": 0.9452736973762512, + "learning_rate": 8.958333333333334e-07, + "logits/chosen": -0.4662383198738098, + "logits/rejected": -0.4447881579399109, + "logps/chosen": -0.9490666389465332, + "logps/rejected": -1.0112388134002686, + "loss": 1.3412, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8981332778930664, + "rewards/margins": 0.12434446066617966, + "rewards/rejected": -2.022477626800537, + "step": 43 + }, + { + "epoch": 0.09302940204823257, + "grad_norm": 0.2848323881626129, + "learning_rate": 9.166666666666665e-07, + "logits/chosen": -0.41404005885124207, + "logits/rejected": -0.3944583535194397, + "logps/chosen": -0.8224930167198181, + "logps/rejected": -0.8416361808776855, + "loss": 1.4027, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.6449860334396362, + "rewards/margins": 0.038286346942186356, + "rewards/rejected": -1.683272361755371, + "step": 44 + }, + { + "epoch": 0.09514370664023786, + "grad_norm": 0.7165678143501282, + "learning_rate": 9.374999999999999e-07, + "logits/chosen": -0.40475326776504517, + "logits/rejected": -0.3559921383857727, + "logps/chosen": -0.8070214986801147, + "logps/rejected": -0.8993593454360962, + "loss": 1.3148, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6140429973602295, + "rewards/margins": 0.18467575311660767, + "rewards/rejected": -1.7987186908721924, + "step": 45 + }, + { + "epoch": 0.09725801123224315, + "grad_norm": 0.4779021739959717, + "learning_rate": 9.583333333333334e-07, + "logits/chosen": -0.4171525835990906, + "logits/rejected": -0.42166149616241455, + "logps/chosen": -0.7872560024261475, + "logps/rejected": -0.8496187925338745, + "loss": 1.3356, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.574512004852295, + "rewards/margins": 0.12472567707300186, + "rewards/rejected": -1.699237585067749, + "step": 46 + }, + { + "epoch": 0.09937231582424844, + "grad_norm": 0.7870219349861145, + "learning_rate": 9.791666666666667e-07, + "logits/chosen": -0.3734116554260254, + "logits/rejected": -0.32778748869895935, + "logps/chosen": -0.7842286825180054, + "logps/rejected": -0.8161548972129822, + "loss": 1.3647, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5684573650360107, + "rewards/margins": 0.06385258585214615, + "rewards/rejected": -1.6323097944259644, + "step": 47 + }, + { + "epoch": 0.10148662041625371, + "grad_norm": 0.2597256600856781, + "learning_rate": 1e-06, + "logits/chosen": -0.4355677664279938, + "logits/rejected": -0.38983187079429626, + "logps/chosen": -0.8787693977355957, + "logps/rejected": -0.9383041262626648, + "loss": 1.35, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.7575387954711914, + "rewards/margins": 0.11906948685646057, + "rewards/rejected": -1.8766082525253296, + "step": 48 + }, + { + "epoch": 0.103600925008259, + "grad_norm": 0.9942799210548401, + "learning_rate": 9.999862751990697e-07, + "logits/chosen": -0.4244321882724762, + "logits/rejected": -0.4366786777973175, + "logps/chosen": -0.7910157442092896, + "logps/rejected": -0.8630884885787964, + "loss": 1.3166, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.582031488418579, + "rewards/margins": 0.14414538443088531, + "rewards/rejected": -1.7261769771575928, + "step": 49 + }, + { + "epoch": 0.10571522960026429, + "grad_norm": 0.5333903431892395, + "learning_rate": 9.999451015497595e-07, + "logits/chosen": -0.389942467212677, + "logits/rejected": -0.36674585938453674, + "logps/chosen": -0.7312074899673462, + "logps/rejected": -0.7289648652076721, + "loss": 1.4225, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4624149799346924, + "rewards/margins": -0.004485193639993668, + "rewards/rejected": -1.4579297304153442, + "step": 50 + }, + { + "epoch": 0.10782953419226958, + "grad_norm": 0.5712242722511292, + "learning_rate": 9.9987648131247e-07, + "logits/chosen": -0.4622853994369507, + "logits/rejected": -0.3728552460670471, + "logps/chosen": -0.8764299750328064, + "logps/rejected": -0.869678795337677, + "loss": 1.4542, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.7528599500656128, + "rewards/margins": -0.013502337038516998, + "rewards/rejected": -1.739357590675354, + "step": 51 + }, + { + "epoch": 0.10994383878427486, + "grad_norm": 0.2586441934108734, + "learning_rate": 9.99780418254397e-07, + "logits/chosen": -0.37249019742012024, + "logits/rejected": -0.3998304605484009, + "logps/chosen": -0.8435611724853516, + "logps/rejected": -0.9359882473945618, + "loss": 1.3057, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6871223449707031, + "rewards/margins": 0.18485431373119354, + "rewards/rejected": -1.8719764947891235, + "step": 52 + }, + { + "epoch": 0.11205814337628014, + "grad_norm": 1.0829113721847534, + "learning_rate": 9.996569176493268e-07, + "logits/chosen": -0.47697725892066956, + "logits/rejected": -0.4208195209503174, + "logps/chosen": -0.8014968037605286, + "logps/rejected": -0.8703804612159729, + "loss": 1.3523, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6029936075210571, + "rewards/margins": 0.1377674788236618, + "rewards/rejected": -1.7407609224319458, + "step": 53 + }, + { + "epoch": 0.11417244796828543, + "grad_norm": 0.5523208379745483, + "learning_rate": 9.995059862773438e-07, + "logits/chosen": -0.40533363819122314, + "logits/rejected": -0.36801978945732117, + "logps/chosen": -0.7641825675964355, + "logps/rejected": -0.8168596029281616, + "loss": 1.3692, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.528365135192871, + "rewards/margins": 0.10535416752099991, + "rewards/rejected": -1.6337192058563232, + "step": 54 + }, + { + "epoch": 0.11628675256029072, + "grad_norm": 0.614101767539978, + "learning_rate": 9.993276324244605e-07, + "logits/chosen": -0.4476906955242157, + "logits/rejected": -0.40396648645401, + "logps/chosen": -0.8706808090209961, + "logps/rejected": -0.9221430420875549, + "loss": 1.3787, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7413616180419922, + "rewards/margins": 0.10292442888021469, + "rewards/rejected": -1.8442860841751099, + "step": 55 + }, + { + "epoch": 0.118401057152296, + "grad_norm": 0.3428778052330017, + "learning_rate": 9.991218658821608e-07, + "logits/chosen": -0.31709593534469604, + "logits/rejected": -0.2760937213897705, + "logps/chosen": -0.842248797416687, + "logps/rejected": -0.8068034648895264, + "loss": 1.498, + "rewards/accuracies": 0.4375, + "rewards/chosen": -1.684497594833374, + "rewards/margins": -0.07089066505432129, + "rewards/rejected": -1.6136069297790527, + "step": 56 + }, + { + "epoch": 0.12051536174430129, + "grad_norm": 0.6877723932266235, + "learning_rate": 9.988886979468643e-07, + "logits/chosen": -0.41800016164779663, + "logits/rejected": -0.4011584222316742, + "logps/chosen": -0.7845420837402344, + "logps/rejected": -0.834447979927063, + "loss": 1.3491, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5690841674804688, + "rewards/margins": 0.09981165081262589, + "rewards/rejected": -1.668895959854126, + "step": 57 + }, + { + "epoch": 0.12262966633630658, + "grad_norm": 0.9649701714515686, + "learning_rate": 9.98628141419305e-07, + "logits/chosen": -0.4253537058830261, + "logits/rejected": -0.4305458962917328, + "logps/chosen": -0.86476731300354, + "logps/rejected": -0.9080386161804199, + "loss": 1.3639, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.72953462600708, + "rewards/margins": 0.08654248714447021, + "rewards/rejected": -1.8160772323608398, + "step": 58 + }, + { + "epoch": 0.12474397092831185, + "grad_norm": 1.3779780864715576, + "learning_rate": 9.98340210603829e-07, + "logits/chosen": -0.39970022439956665, + "logits/rejected": -0.441428005695343, + "logps/chosen": -0.8662775158882141, + "logps/rejected": -0.9646260738372803, + "loss": 1.3001, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7325550317764282, + "rewards/margins": 0.19669723510742188, + "rewards/rejected": -1.9292521476745605, + "step": 59 + }, + { + "epoch": 0.12685827552031714, + "grad_norm": 0.5366966724395752, + "learning_rate": 9.980249213076084e-07, + "logits/chosen": -0.37770116329193115, + "logits/rejected": -0.35231757164001465, + "logps/chosen": -0.8165755867958069, + "logps/rejected": -0.8619179129600525, + "loss": 1.3699, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6331511735916138, + "rewards/margins": 0.09068439900875092, + "rewards/rejected": -1.723835825920105, + "step": 60 + }, + { + "epoch": 0.12897258011232243, + "grad_norm": 0.36810922622680664, + "learning_rate": 9.976822908397748e-07, + "logits/chosen": -0.4224976897239685, + "logits/rejected": -0.41758257150650024, + "logps/chosen": -0.8445641994476318, + "logps/rejected": -0.9393664598464966, + "loss": 1.3193, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6891283988952637, + "rewards/margins": 0.18960458040237427, + "rewards/rejected": -1.8787329196929932, + "step": 61 + }, + { + "epoch": 0.13108688470432772, + "grad_norm": 0.6838279366493225, + "learning_rate": 9.97312338010468e-07, + "logits/chosen": -0.4168627858161926, + "logits/rejected": -0.36115381121635437, + "logps/chosen": -0.8370552659034729, + "logps/rejected": -0.8352169394493103, + "loss": 1.4284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6741105318069458, + "rewards/margins": -0.0036766715347766876, + "rewards/rejected": -1.6704338788986206, + "step": 62 + }, + { + "epoch": 0.133201189296333, + "grad_norm": 0.39330533146858215, + "learning_rate": 9.969150831298037e-07, + "logits/chosen": -0.4558233618736267, + "logits/rejected": -0.4025765061378479, + "logps/chosen": -0.826255738735199, + "logps/rejected": -0.894213080406189, + "loss": 1.3485, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.652511477470398, + "rewards/margins": 0.13591471314430237, + "rewards/rejected": -1.788426160812378, + "step": 63 + }, + { + "epoch": 0.1353154938883383, + "grad_norm": 0.6055929660797119, + "learning_rate": 9.964905480067584e-07, + "logits/chosen": -0.459463506937027, + "logits/rejected": -0.42943331599235535, + "logps/chosen": -0.7901928424835205, + "logps/rejected": -0.7964221239089966, + "loss": 1.4057, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.580385684967041, + "rewards/margins": 0.012458762153983116, + "rewards/rejected": -1.5928442478179932, + "step": 64 + } + ], + "logging_steps": 1, + "max_steps": 472, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..872ac37992951054b5f3223962fa14261b9ddbb9 --- /dev/null +++ b/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "/home/v-liuzichuan/huggingface/Llama-3.2-3B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a60afe8b3f7e5350e90ea93bf7fd77f71e6e1e81 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,16 @@ +{ + "epoch": 0.9979517674264948, + "eval_logits/chosen": -0.6275817155838013, + "eval_logits/rejected": -0.6124553680419922, + "eval_logps/chosen": -1.7196346521377563, + "eval_logps/rejected": -2.078376293182373, + "eval_loss": 1.193624496459961, + "eval_rewards/accuracies": 0.6531440019607544, + "eval_rewards/chosen": -3.4392693042755127, + "eval_rewards/margins": 0.7174834609031677, + "eval_rewards/rejected": -4.156752586364746, + "eval_runtime": 401.2915, + "eval_samples": 1972, + "eval_samples_per_second": 4.914, + "eval_steps_per_second": 1.229 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18d16b4cc3c799dd78e854fd824ddf8a2996285 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.2" +} diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..338240a8dd22aab493666ae76b5bacad45a258c4 --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b545023ec84ebafbf8707baa5dd99d4a8fc503e81f47167d70732510d2ca7922 +size 4965799096 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33b50cf297e3a969e9a172bcf6adccc38753d31e --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05ed5a1ee186eaa4b7717096d6008f744a478b154c9e969eaaddd323150b36c5 +size 2247734992 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ed64de846d720b9a7859dc20575fea8e8ca51940 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_size": 7213504512 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdb7bfcc59fd293867d98e124a30b3ddf39c93 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a5790c090c5cb2af1dc6722405d5714a617bd3e3 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 0.9979517674264948, + "total_flos": 0.0, + "train_loss": 1.280224425307775, + "train_runtime": 38087.5267, + "train_samples": 60539, + "train_samples_per_second": 1.589, + "train_steps_per_second": 0.012 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..589430453f6e67759dd499dfacfb86046205ffab --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,7122 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9979517674264948, + "eval_steps": 500, + "global_step": 472, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002114304592005286, + "grad_norm": 1.5018059015274048, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.3466828167438507, + "logits/rejected": -0.30099987983703613, + "logps/chosen": -0.9345186948776245, + "logps/rejected": -0.9117153882980347, + "loss": 1.4889, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.869037389755249, + "rewards/margins": -0.04560665041208267, + "rewards/rejected": -1.8234307765960693, + "step": 1 + }, + { + "epoch": 0.004228609184010572, + "grad_norm": 0.8093975186347961, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.4310421049594879, + "logits/rejected": -0.39132067561149597, + "logps/chosen": -0.8198825716972351, + "logps/rejected": -0.8644211888313293, + "loss": 1.376, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.6397651433944702, + "rewards/margins": 0.08907715976238251, + "rewards/rejected": -1.7288423776626587, + "step": 2 + }, + { + "epoch": 0.006342913776015857, + "grad_norm": 0.5377389788627625, + "learning_rate": 6.25e-08, + "logits/chosen": -0.46692028641700745, + "logits/rejected": -0.4649256467819214, + "logps/chosen": -0.9087910652160645, + "logps/rejected": -0.9648240804672241, + "loss": 1.3404, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.817582130432129, + "rewards/margins": 0.11206617206335068, + "rewards/rejected": -1.9296481609344482, + "step": 3 + }, + { + "epoch": 0.008457218368021144, + "grad_norm": 0.3221875727176666, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.416828453540802, + "logits/rejected": -0.3584724962711334, + "logps/chosen": -0.7818898558616638, + "logps/rejected": -0.8170815110206604, + "loss": 1.3806, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.5637797117233276, + "rewards/margins": 0.07038339227437973, + "rewards/rejected": -1.6341630220413208, + "step": 4 + }, + { + "epoch": 0.010571522960026428, + "grad_norm": 0.64655601978302, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.376886248588562, + "logits/rejected": -0.3516141474246979, + "logps/chosen": -0.8814125061035156, + "logps/rejected": -1.0214396715164185, + "loss": 1.2741, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7628250122070312, + "rewards/margins": 0.28005433082580566, + "rewards/rejected": -2.042879343032837, + "step": 5 + }, + { + "epoch": 0.012685827552031714, + "grad_norm": 0.4775894582271576, + "learning_rate": 1.25e-07, + "logits/chosen": -0.4757865369319916, + "logits/rejected": -0.4498941898345947, + "logps/chosen": -0.8962199687957764, + "logps/rejected": -0.9462199807167053, + "loss": 1.364, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.7924399375915527, + "rewards/margins": 0.10000008344650269, + "rewards/rejected": -1.8924399614334106, + "step": 6 + }, + { + "epoch": 0.014800132144037, + "grad_norm": 1.2459568977355957, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.38895344734191895, + "logits/rejected": -0.38165366649627686, + "logps/chosen": -0.9025766253471375, + "logps/rejected": -0.9465017318725586, + "loss": 1.3898, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.805153250694275, + "rewards/margins": 0.0878501906991005, + "rewards/rejected": -1.8930034637451172, + "step": 7 + }, + { + "epoch": 0.016914436736042288, + "grad_norm": 0.6195729374885559, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.3964853286743164, + "logits/rejected": -0.377862811088562, + "logps/chosen": -0.9054160118103027, + "logps/rejected": -0.9605879187583923, + "loss": 1.3821, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8108320236206055, + "rewards/margins": 0.1103438138961792, + "rewards/rejected": -1.9211758375167847, + "step": 8 + }, + { + "epoch": 0.019028741328047574, + "grad_norm": 1.2074137926101685, + "learning_rate": 1.875e-07, + "logits/chosen": -0.3729037344455719, + "logits/rejected": -0.38143450021743774, + "logps/chosen": -0.9328653216362, + "logps/rejected": -0.9905799627304077, + "loss": 1.3754, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8657306432724, + "rewards/margins": 0.11542946100234985, + "rewards/rejected": -1.9811599254608154, + "step": 9 + }, + { + "epoch": 0.021143045920052856, + "grad_norm": 0.2867220640182495, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.4263336658477783, + "logits/rejected": -0.42903271317481995, + "logps/chosen": -0.8979260325431824, + "logps/rejected": -0.9078099727630615, + "loss": 1.4438, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7958520650863647, + "rewards/margins": 0.019767940044403076, + "rewards/rejected": -1.815619945526123, + "step": 10 + }, + { + "epoch": 0.023257350512058142, + "grad_norm": 0.8363026976585388, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.3374914526939392, + "logits/rejected": -0.32399696111679077, + "logps/chosen": -0.8886098861694336, + "logps/rejected": -0.9484556317329407, + "loss": 1.3422, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7772197723388672, + "rewards/margins": 0.11969132721424103, + "rewards/rejected": -1.8969112634658813, + "step": 11 + }, + { + "epoch": 0.025371655104063428, + "grad_norm": 0.5406804084777832, + "learning_rate": 2.5e-07, + "logits/chosen": -0.42844679951667786, + "logits/rejected": -0.37984615564346313, + "logps/chosen": -0.861629843711853, + "logps/rejected": -0.8968492150306702, + "loss": 1.3922, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.723259687423706, + "rewards/margins": 0.07043875753879547, + "rewards/rejected": -1.7936984300613403, + "step": 12 + }, + { + "epoch": 0.027485959696068714, + "grad_norm": 0.9919329285621643, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.36495402455329895, + "logits/rejected": -0.3249490261077881, + "logps/chosen": -0.8502095937728882, + "logps/rejected": -0.8470643758773804, + "loss": 1.4334, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7004191875457764, + "rewards/margins": -0.006290358491241932, + "rewards/rejected": -1.6941287517547607, + "step": 13 + }, + { + "epoch": 0.029600264288074, + "grad_norm": 0.5477162003517151, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.4155704081058502, + "logits/rejected": -0.39535820484161377, + "logps/chosen": -1.0430240631103516, + "logps/rejected": -1.1318373680114746, + "loss": 1.3533, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.086048126220703, + "rewards/margins": 0.17762640118598938, + "rewards/rejected": -2.263674736022949, + "step": 14 + }, + { + "epoch": 0.031714568880079286, + "grad_norm": 0.26530712842941284, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.4810572564601898, + "logits/rejected": -0.42454615235328674, + "logps/chosen": -0.8741041421890259, + "logps/rejected": -0.9494178295135498, + "loss": 1.3655, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7482082843780518, + "rewards/margins": 0.15062758326530457, + "rewards/rejected": -1.8988356590270996, + "step": 15 + }, + { + "epoch": 0.033828873472084575, + "grad_norm": 0.9272629618644714, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.4440098702907562, + "logits/rejected": -0.3930297791957855, + "logps/chosen": -0.8473359942436218, + "logps/rejected": -0.9369213581085205, + "loss": 1.3248, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6946719884872437, + "rewards/margins": 0.17917080223560333, + "rewards/rejected": -1.873842716217041, + "step": 16 + }, + { + "epoch": 0.03594317806408986, + "grad_norm": 0.5912418961524963, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.3838099539279938, + "logits/rejected": -0.3507584035396576, + "logps/chosen": -0.8888350129127502, + "logps/rejected": -0.9361770749092102, + "loss": 1.383, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.7776700258255005, + "rewards/margins": 0.0946839451789856, + "rewards/rejected": -1.8723541498184204, + "step": 17 + }, + { + "epoch": 0.03805748265609515, + "grad_norm": 0.6536504030227661, + "learning_rate": 3.75e-07, + "logits/chosen": -0.3581697940826416, + "logits/rejected": -0.3620460629463196, + "logps/chosen": -0.8519617319107056, + "logps/rejected": -0.9022184610366821, + "loss": 1.3841, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7039234638214111, + "rewards/margins": 0.10051343590021133, + "rewards/rejected": -1.8044369220733643, + "step": 18 + }, + { + "epoch": 0.04017178724810043, + "grad_norm": 0.3433632552623749, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.37887442111968994, + "logits/rejected": -0.37543320655822754, + "logps/chosen": -0.9464104175567627, + "logps/rejected": -1.0017329454421997, + "loss": 1.3649, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8928208351135254, + "rewards/margins": 0.11064518243074417, + "rewards/rejected": -2.0034658908843994, + "step": 19 + }, + { + "epoch": 0.04228609184010571, + "grad_norm": 0.9764007329940796, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.44110679626464844, + "logits/rejected": -0.4280649721622467, + "logps/chosen": -0.9046768546104431, + "logps/rejected": -1.0464633703231812, + "loss": 1.2592, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.8093537092208862, + "rewards/margins": 0.2835729420185089, + "rewards/rejected": -2.0929267406463623, + "step": 20 + }, + { + "epoch": 0.044400396432111, + "grad_norm": 1.8563830852508545, + "learning_rate": 4.375e-07, + "logits/chosen": -0.45183491706848145, + "logits/rejected": -0.42935287952423096, + "logps/chosen": -0.9043138027191162, + "logps/rejected": -0.9462392926216125, + "loss": 1.3784, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.8086276054382324, + "rewards/margins": 0.08385094255208969, + "rewards/rejected": -1.892478585243225, + "step": 21 + }, + { + "epoch": 0.046514701024116284, + "grad_norm": 1.3473299741744995, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.37855517864227295, + "logits/rejected": -0.34429043531417847, + "logps/chosen": -0.9284683465957642, + "logps/rejected": -0.9454050064086914, + "loss": 1.4346, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.8569366931915283, + "rewards/margins": 0.03387312963604927, + "rewards/rejected": -1.8908100128173828, + "step": 22 + }, + { + "epoch": 0.04862900561612157, + "grad_norm": 0.940831184387207, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.39172160625457764, + "logits/rejected": -0.3695780634880066, + "logps/chosen": -0.9314202666282654, + "logps/rejected": -1.020229697227478, + "loss": 1.3322, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.8628405332565308, + "rewards/margins": 0.17761869728565216, + "rewards/rejected": -2.040459394454956, + "step": 23 + }, + { + "epoch": 0.050743310208126856, + "grad_norm": 0.5783158540725708, + "learning_rate": 5e-07, + "logits/chosen": -0.4958629608154297, + "logits/rejected": -0.4257377088069916, + "logps/chosen": -0.9379237294197083, + "logps/rejected": -0.9415461421012878, + "loss": 1.441, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.8758474588394165, + "rewards/margins": 0.0072449808940291405, + "rewards/rejected": -1.8830922842025757, + "step": 24 + }, + { + "epoch": 0.052857614800132145, + "grad_norm": 1.4209853410720825, + "learning_rate": 5.208333333333334e-07, + "logits/chosen": -0.36407172679901123, + "logits/rejected": -0.3331725299358368, + "logps/chosen": -0.9192589521408081, + "logps/rejected": -0.9595308899879456, + "loss": 1.3994, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.8385179042816162, + "rewards/margins": 0.080544114112854, + "rewards/rejected": -1.9190617799758911, + "step": 25 + }, + { + "epoch": 0.05497191939213743, + "grad_norm": 0.6310216188430786, + "learning_rate": 5.416666666666666e-07, + "logits/chosen": -0.41772690415382385, + "logits/rejected": -0.36565953493118286, + "logps/chosen": -0.8052878379821777, + "logps/rejected": -0.8673746585845947, + "loss": 1.3356, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6105756759643555, + "rewards/margins": 0.12417369335889816, + "rewards/rejected": -1.7347493171691895, + "step": 26 + }, + { + "epoch": 0.05708622398414272, + "grad_norm": 1.2933462858200073, + "learning_rate": 5.625e-07, + "logits/chosen": -0.4482795000076294, + "logits/rejected": -0.39409321546554565, + "logps/chosen": -0.8339261412620544, + "logps/rejected": -0.8675202131271362, + "loss": 1.3739, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.6678522825241089, + "rewards/margins": 0.06718815863132477, + "rewards/rejected": -1.7350404262542725, + "step": 27 + }, + { + "epoch": 0.059200528576148, + "grad_norm": 0.5808025002479553, + "learning_rate": 5.833333333333334e-07, + "logits/chosen": -0.37116044759750366, + "logits/rejected": -0.3478051722049713, + "logps/chosen": -0.8950318694114685, + "logps/rejected": -0.9756672978401184, + "loss": 1.3505, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.790063738822937, + "rewards/margins": 0.1612708568572998, + "rewards/rejected": -1.9513345956802368, + "step": 28 + }, + { + "epoch": 0.06131483316815329, + "grad_norm": 1.0569533109664917, + "learning_rate": 6.041666666666666e-07, + "logits/chosen": -0.421148419380188, + "logits/rejected": -0.38443076610565186, + "logps/chosen": -0.8021283745765686, + "logps/rejected": -0.8370179533958435, + "loss": 1.3916, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.6042567491531372, + "rewards/margins": 0.06977920234203339, + "rewards/rejected": -1.674035906791687, + "step": 29 + }, + { + "epoch": 0.06342913776015857, + "grad_norm": 0.42577147483825684, + "learning_rate": 6.249999999999999e-07, + "logits/chosen": -0.4429818391799927, + "logits/rejected": -0.3524704575538635, + "logps/chosen": -0.8916822671890259, + "logps/rejected": -0.8985542058944702, + "loss": 1.4321, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.7833645343780518, + "rewards/margins": 0.01374388113617897, + "rewards/rejected": -1.7971084117889404, + "step": 30 + }, + { + "epoch": 0.06554344235216386, + "grad_norm": 1.0056904554367065, + "learning_rate": 6.458333333333333e-07, + "logits/chosen": -0.376451700925827, + "logits/rejected": -0.342519074678421, + "logps/chosen": -0.9038617014884949, + "logps/rejected": -0.953092634677887, + "loss": 1.398, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.8077234029769897, + "rewards/margins": 0.09846188127994537, + "rewards/rejected": -1.906185269355774, + "step": 31 + }, + { + "epoch": 0.06765774694416915, + "grad_norm": 0.5494012236595154, + "learning_rate": 6.666666666666666e-07, + "logits/chosen": -0.3459138870239258, + "logits/rejected": -0.3590989410877228, + "logps/chosen": -0.8274999260902405, + "logps/rejected": -0.8776509761810303, + "loss": 1.363, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.654999852180481, + "rewards/margins": 0.1003020703792572, + "rewards/rejected": -1.7553019523620605, + "step": 32 + }, + { + "epoch": 0.06977205153617443, + "grad_norm": 0.693267822265625, + "learning_rate": 6.875e-07, + "logits/chosen": -0.40053680539131165, + "logits/rejected": -0.37323904037475586, + "logps/chosen": -0.8255244493484497, + "logps/rejected": -0.8658804893493652, + "loss": 1.3712, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6510488986968994, + "rewards/margins": 0.08071210980415344, + "rewards/rejected": -1.7317609786987305, + "step": 33 + }, + { + "epoch": 0.07188635612817972, + "grad_norm": 2.213238000869751, + "learning_rate": 7.083333333333334e-07, + "logits/chosen": -0.40097948908805847, + "logits/rejected": -0.38190510869026184, + "logps/chosen": -0.9122671484947205, + "logps/rejected": -0.9549552798271179, + "loss": 1.36, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.824534296989441, + "rewards/margins": 0.0853763073682785, + "rewards/rejected": -1.9099105596542358, + "step": 34 + }, + { + "epoch": 0.074000660720185, + "grad_norm": 0.6859830021858215, + "learning_rate": 7.291666666666666e-07, + "logits/chosen": -0.42501094937324524, + "logits/rejected": -0.42549416422843933, + "logps/chosen": -1.0008373260498047, + "logps/rejected": -1.1157118082046509, + "loss": 1.3294, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.0016746520996094, + "rewards/margins": 0.22974897921085358, + "rewards/rejected": -2.2314236164093018, + "step": 35 + }, + { + "epoch": 0.0761149653121903, + "grad_norm": 0.6468721628189087, + "learning_rate": 7.5e-07, + "logits/chosen": -0.36494994163513184, + "logits/rejected": -0.30433908104896545, + "logps/chosen": -0.9062094688415527, + "logps/rejected": -0.920263409614563, + "loss": 1.4312, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -1.8124189376831055, + "rewards/margins": 0.02810765616595745, + "rewards/rejected": -1.840526819229126, + "step": 36 + }, + { + "epoch": 0.07822926990419557, + "grad_norm": 0.5085556507110596, + "learning_rate": 7.708333333333333e-07, + "logits/chosen": -0.4677881598472595, + "logits/rejected": -0.456132709980011, + "logps/chosen": -1.0101865530014038, + "logps/rejected": -1.0429682731628418, + "loss": 1.4132, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -2.0203731060028076, + "rewards/margins": 0.06556359678506851, + "rewards/rejected": -2.0859365463256836, + "step": 37 + }, + { + "epoch": 0.08034357449620086, + "grad_norm": 0.23813335597515106, + "learning_rate": 7.916666666666666e-07, + "logits/chosen": -0.3991190791130066, + "logits/rejected": -0.3664044141769409, + "logps/chosen": -0.9578174352645874, + "logps/rejected": -0.9229263067245483, + "loss": 1.4824, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.9156348705291748, + "rewards/margins": -0.06978224962949753, + "rewards/rejected": -1.8458526134490967, + "step": 38 + }, + { + "epoch": 0.08245787908820615, + "grad_norm": 0.587037980556488, + "learning_rate": 8.125e-07, + "logits/chosen": -0.37554049491882324, + "logits/rejected": -0.36305734515190125, + "logps/chosen": -0.8503091931343079, + "logps/rejected": -0.864615261554718, + "loss": 1.4086, + "rewards/accuracies": 0.4296875, + "rewards/chosen": -1.7006183862686157, + "rewards/margins": 0.028611989691853523, + "rewards/rejected": -1.729230523109436, + "step": 39 + }, + { + "epoch": 0.08457218368021142, + "grad_norm": 0.4172501862049103, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.4405443072319031, + "logits/rejected": -0.41723060607910156, + "logps/chosen": -0.8502858877182007, + "logps/rejected": -0.9114271402359009, + "loss": 1.3446, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.7005717754364014, + "rewards/margins": 0.12228240817785263, + "rewards/rejected": -1.8228542804718018, + "step": 40 + }, + { + "epoch": 0.08668648827221671, + "grad_norm": 0.9275372624397278, + "learning_rate": 8.541666666666666e-07, + "logits/chosen": -0.4200601577758789, + "logits/rejected": -0.3478623628616333, + "logps/chosen": -0.892408013343811, + "logps/rejected": -0.9276402592658997, + "loss": 1.3887, + "rewards/accuracies": 0.46875, + "rewards/chosen": -1.784816026687622, + "rewards/margins": 0.07046431303024292, + "rewards/rejected": -1.8552805185317993, + "step": 41 + }, + { + "epoch": 0.088800792864222, + "grad_norm": 0.7317383289337158, + "learning_rate": 8.75e-07, + "logits/chosen": -0.37675267457962036, + "logits/rejected": -0.33540332317352295, + "logps/chosen": -0.7866061925888062, + "logps/rejected": -0.824250340461731, + "loss": 1.3837, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5732123851776123, + "rewards/margins": 0.07528844475746155, + "rewards/rejected": -1.648500680923462, + "step": 42 + }, + { + "epoch": 0.09091509745622729, + "grad_norm": 0.9452736973762512, + "learning_rate": 8.958333333333334e-07, + "logits/chosen": -0.4662383198738098, + "logits/rejected": -0.4447881579399109, + "logps/chosen": -0.9490666389465332, + "logps/rejected": -1.0112388134002686, + "loss": 1.3412, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8981332778930664, + "rewards/margins": 0.12434446066617966, + "rewards/rejected": -2.022477626800537, + "step": 43 + }, + { + "epoch": 0.09302940204823257, + "grad_norm": 0.2848323881626129, + "learning_rate": 9.166666666666665e-07, + "logits/chosen": -0.41404005885124207, + "logits/rejected": -0.3944583535194397, + "logps/chosen": -0.8224930167198181, + "logps/rejected": -0.8416361808776855, + "loss": 1.4027, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.6449860334396362, + "rewards/margins": 0.038286346942186356, + "rewards/rejected": -1.683272361755371, + "step": 44 + }, + { + "epoch": 0.09514370664023786, + "grad_norm": 0.7165678143501282, + "learning_rate": 9.374999999999999e-07, + "logits/chosen": -0.40475326776504517, + "logits/rejected": -0.3559921383857727, + "logps/chosen": -0.8070214986801147, + "logps/rejected": -0.8993593454360962, + "loss": 1.3148, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6140429973602295, + "rewards/margins": 0.18467575311660767, + "rewards/rejected": -1.7987186908721924, + "step": 45 + }, + { + "epoch": 0.09725801123224315, + "grad_norm": 0.4779021739959717, + "learning_rate": 9.583333333333334e-07, + "logits/chosen": -0.4171525835990906, + "logits/rejected": -0.42166149616241455, + "logps/chosen": -0.7872560024261475, + "logps/rejected": -0.8496187925338745, + "loss": 1.3356, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.574512004852295, + "rewards/margins": 0.12472567707300186, + "rewards/rejected": -1.699237585067749, + "step": 46 + }, + { + "epoch": 0.09937231582424844, + "grad_norm": 0.7870219349861145, + "learning_rate": 9.791666666666667e-07, + "logits/chosen": -0.3734116554260254, + "logits/rejected": -0.32778748869895935, + "logps/chosen": -0.7842286825180054, + "logps/rejected": -0.8161548972129822, + "loss": 1.3647, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.5684573650360107, + "rewards/margins": 0.06385258585214615, + "rewards/rejected": -1.6323097944259644, + "step": 47 + }, + { + "epoch": 0.10148662041625371, + "grad_norm": 0.2597256600856781, + "learning_rate": 1e-06, + "logits/chosen": -0.4355677664279938, + "logits/rejected": -0.38983187079429626, + "logps/chosen": -0.8787693977355957, + "logps/rejected": -0.9383041262626648, + "loss": 1.35, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.7575387954711914, + "rewards/margins": 0.11906948685646057, + "rewards/rejected": -1.8766082525253296, + "step": 48 + }, + { + "epoch": 0.103600925008259, + "grad_norm": 0.9942799210548401, + "learning_rate": 9.999862751990697e-07, + "logits/chosen": -0.4244321882724762, + "logits/rejected": -0.4366786777973175, + "logps/chosen": -0.7910157442092896, + "logps/rejected": -0.8630884885787964, + "loss": 1.3166, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.582031488418579, + "rewards/margins": 0.14414538443088531, + "rewards/rejected": -1.7261769771575928, + "step": 49 + }, + { + "epoch": 0.10571522960026429, + "grad_norm": 0.5333903431892395, + "learning_rate": 9.999451015497595e-07, + "logits/chosen": -0.389942467212677, + "logits/rejected": -0.36674585938453674, + "logps/chosen": -0.7312074899673462, + "logps/rejected": -0.7289648652076721, + "loss": 1.4225, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4624149799346924, + "rewards/margins": -0.004485193639993668, + "rewards/rejected": -1.4579297304153442, + "step": 50 + }, + { + "epoch": 0.10782953419226958, + "grad_norm": 0.5712242722511292, + "learning_rate": 9.9987648131247e-07, + "logits/chosen": -0.4622853994369507, + "logits/rejected": -0.3728552460670471, + "logps/chosen": -0.8764299750328064, + "logps/rejected": -0.869678795337677, + "loss": 1.4542, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.7528599500656128, + "rewards/margins": -0.013502337038516998, + "rewards/rejected": -1.739357590675354, + "step": 51 + }, + { + "epoch": 0.10994383878427486, + "grad_norm": 0.2586441934108734, + "learning_rate": 9.99780418254397e-07, + "logits/chosen": -0.37249019742012024, + "logits/rejected": -0.3998304605484009, + "logps/chosen": -0.8435611724853516, + "logps/rejected": -0.9359882473945618, + "loss": 1.3057, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6871223449707031, + "rewards/margins": 0.18485431373119354, + "rewards/rejected": -1.8719764947891235, + "step": 52 + }, + { + "epoch": 0.11205814337628014, + "grad_norm": 1.0829113721847534, + "learning_rate": 9.996569176493268e-07, + "logits/chosen": -0.47697725892066956, + "logits/rejected": -0.4208195209503174, + "logps/chosen": -0.8014968037605286, + "logps/rejected": -0.8703804612159729, + "loss": 1.3523, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6029936075210571, + "rewards/margins": 0.1377674788236618, + "rewards/rejected": -1.7407609224319458, + "step": 53 + }, + { + "epoch": 0.11417244796828543, + "grad_norm": 0.5523208379745483, + "learning_rate": 9.995059862773438e-07, + "logits/chosen": -0.40533363819122314, + "logits/rejected": -0.36801978945732117, + "logps/chosen": -0.7641825675964355, + "logps/rejected": -0.8168596029281616, + "loss": 1.3692, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.528365135192871, + "rewards/margins": 0.10535416752099991, + "rewards/rejected": -1.6337192058563232, + "step": 54 + }, + { + "epoch": 0.11628675256029072, + "grad_norm": 0.614101767539978, + "learning_rate": 9.993276324244605e-07, + "logits/chosen": -0.4476906955242157, + "logits/rejected": -0.40396648645401, + "logps/chosen": -0.8706808090209961, + "logps/rejected": -0.9221430420875549, + "loss": 1.3787, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.7413616180419922, + "rewards/margins": 0.10292442888021469, + "rewards/rejected": -1.8442860841751099, + "step": 55 + }, + { + "epoch": 0.118401057152296, + "grad_norm": 0.3428778052330017, + "learning_rate": 9.991218658821608e-07, + "logits/chosen": -0.31709593534469604, + "logits/rejected": -0.2760937213897705, + "logps/chosen": -0.842248797416687, + "logps/rejected": -0.8068034648895264, + "loss": 1.498, + "rewards/accuracies": 0.4375, + "rewards/chosen": -1.684497594833374, + "rewards/margins": -0.07089066505432129, + "rewards/rejected": -1.6136069297790527, + "step": 56 + }, + { + "epoch": 0.12051536174430129, + "grad_norm": 0.6877723932266235, + "learning_rate": 9.988886979468643e-07, + "logits/chosen": -0.41800016164779663, + "logits/rejected": -0.4011584222316742, + "logps/chosen": -0.7845420837402344, + "logps/rejected": -0.834447979927063, + "loss": 1.3491, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5690841674804688, + "rewards/margins": 0.09981165081262589, + "rewards/rejected": -1.668895959854126, + "step": 57 + }, + { + "epoch": 0.12262966633630658, + "grad_norm": 0.9649701714515686, + "learning_rate": 9.98628141419305e-07, + "logits/chosen": -0.4253537058830261, + "logits/rejected": -0.4305458962917328, + "logps/chosen": -0.86476731300354, + "logps/rejected": -0.9080386161804199, + "loss": 1.3639, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.72953462600708, + "rewards/margins": 0.08654248714447021, + "rewards/rejected": -1.8160772323608398, + "step": 58 + }, + { + "epoch": 0.12474397092831185, + "grad_norm": 1.3779780864715576, + "learning_rate": 9.98340210603829e-07, + "logits/chosen": -0.39970022439956665, + "logits/rejected": -0.441428005695343, + "logps/chosen": -0.8662775158882141, + "logps/rejected": -0.9646260738372803, + "loss": 1.3001, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7325550317764282, + "rewards/margins": 0.19669723510742188, + "rewards/rejected": -1.9292521476745605, + "step": 59 + }, + { + "epoch": 0.12685827552031714, + "grad_norm": 0.5366966724395752, + "learning_rate": 9.980249213076084e-07, + "logits/chosen": -0.37770116329193115, + "logits/rejected": -0.35231757164001465, + "logps/chosen": -0.8165755867958069, + "logps/rejected": -0.8619179129600525, + "loss": 1.3699, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6331511735916138, + "rewards/margins": 0.09068439900875092, + "rewards/rejected": -1.723835825920105, + "step": 60 + }, + { + "epoch": 0.12897258011232243, + "grad_norm": 0.36810922622680664, + "learning_rate": 9.976822908397748e-07, + "logits/chosen": -0.4224976897239685, + "logits/rejected": -0.41758257150650024, + "logps/chosen": -0.8445641994476318, + "logps/rejected": -0.9393664598464966, + "loss": 1.3193, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6891283988952637, + "rewards/margins": 0.18960458040237427, + "rewards/rejected": -1.8787329196929932, + "step": 61 + }, + { + "epoch": 0.13108688470432772, + "grad_norm": 0.6838279366493225, + "learning_rate": 9.97312338010468e-07, + "logits/chosen": -0.4168627858161926, + "logits/rejected": -0.36115381121635437, + "logps/chosen": -0.8370552659034729, + "logps/rejected": -0.8352169394493103, + "loss": 1.4284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.6741105318069458, + "rewards/margins": -0.0036766715347766876, + "rewards/rejected": -1.6704338788986206, + "step": 62 + }, + { + "epoch": 0.133201189296333, + "grad_norm": 0.39330533146858215, + "learning_rate": 9.969150831298037e-07, + "logits/chosen": -0.4558233618736267, + "logits/rejected": -0.4025765061378479, + "logps/chosen": -0.826255738735199, + "logps/rejected": -0.894213080406189, + "loss": 1.3485, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.652511477470398, + "rewards/margins": 0.13591471314430237, + "rewards/rejected": -1.788426160812378, + "step": 63 + }, + { + "epoch": 0.1353154938883383, + "grad_norm": 0.6055929660797119, + "learning_rate": 9.964905480067584e-07, + "logits/chosen": -0.459463506937027, + "logits/rejected": -0.42943331599235535, + "logps/chosen": -0.7901928424835205, + "logps/rejected": -0.7964221239089966, + "loss": 1.4057, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.580385684967041, + "rewards/margins": 0.012458762153983116, + "rewards/rejected": -1.5928442478179932, + "step": 64 + }, + { + "epoch": 0.13742979848034356, + "grad_norm": 0.37883859872817993, + "learning_rate": 9.960387559479725e-07, + "logits/chosen": -0.4447207450866699, + "logits/rejected": -0.371269553899765, + "logps/chosen": -0.7863065004348755, + "logps/rejected": -0.7983666658401489, + "loss": 1.4202, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -1.572613000869751, + "rewards/margins": 0.02412020042538643, + "rewards/rejected": -1.5967333316802979, + "step": 65 + }, + { + "epoch": 0.13954410307234885, + "grad_norm": 0.31330156326293945, + "learning_rate": 9.955597317564703e-07, + "logits/chosen": -0.42059677839279175, + "logits/rejected": -0.37605100870132446, + "logps/chosen": -0.7669360637664795, + "logps/rejected": -0.8348797559738159, + "loss": 1.3368, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.533872127532959, + "rewards/margins": 0.13588732481002808, + "rewards/rejected": -1.6697595119476318, + "step": 66 + }, + { + "epoch": 0.14165840766435414, + "grad_norm": 0.4353170096874237, + "learning_rate": 9.950535017302983e-07, + "logits/chosen": -0.3897082805633545, + "logits/rejected": -0.38229796290397644, + "logps/chosen": -0.7249190807342529, + "logps/rejected": -0.7696882486343384, + "loss": 1.3511, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4498381614685059, + "rewards/margins": 0.08953814208507538, + "rewards/rejected": -1.5393764972686768, + "step": 67 + }, + { + "epoch": 0.14377271225635943, + "grad_norm": 0.6724106669425964, + "learning_rate": 9.94520093661082e-07, + "logits/chosen": -0.3687596023082733, + "logits/rejected": -0.34222811460494995, + "logps/chosen": -0.7845972776412964, + "logps/rejected": -0.8308086395263672, + "loss": 1.3737, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5691945552825928, + "rewards/margins": 0.09242270141839981, + "rewards/rejected": -1.6616172790527344, + "step": 68 + }, + { + "epoch": 0.14588701684836472, + "grad_norm": 0.7312172651290894, + "learning_rate": 9.939595368324994e-07, + "logits/chosen": -0.4475817382335663, + "logits/rejected": -0.3975730538368225, + "logps/chosen": -0.7314785718917847, + "logps/rejected": -0.7924487590789795, + "loss": 1.3439, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4629571437835693, + "rewards/margins": 0.12194043397903442, + "rewards/rejected": -1.584897518157959, + "step": 69 + }, + { + "epoch": 0.14800132144037, + "grad_norm": 0.301097571849823, + "learning_rate": 9.933718620186744e-07, + "logits/chosen": -0.402032732963562, + "logits/rejected": -0.3640722632408142, + "logps/chosen": -0.7727882862091064, + "logps/rejected": -0.8291516304016113, + "loss": 1.358, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.545576572418213, + "rewards/margins": 0.11272668838500977, + "rewards/rejected": -1.6583032608032227, + "step": 70 + }, + { + "epoch": 0.1501156260323753, + "grad_norm": 0.377835750579834, + "learning_rate": 9.92757101482486e-07, + "logits/chosen": -0.316825270652771, + "logits/rejected": -0.3245603144168854, + "logps/chosen": -0.7962774634361267, + "logps/rejected": -0.8610175848007202, + "loss": 1.3464, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5925549268722534, + "rewards/margins": 0.12948019802570343, + "rewards/rejected": -1.7220351696014404, + "step": 71 + }, + { + "epoch": 0.1522299306243806, + "grad_norm": 0.84058678150177, + "learning_rate": 9.921152889737984e-07, + "logits/chosen": -0.4446060359477997, + "logits/rejected": -0.43160340189933777, + "logps/chosen": -0.7745426297187805, + "logps/rejected": -0.8286185264587402, + "loss": 1.3408, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.549085259437561, + "rewards/margins": 0.10815180093050003, + "rewards/rejected": -1.6572370529174805, + "step": 72 + }, + { + "epoch": 0.15434423521638585, + "grad_norm": 0.6970808506011963, + "learning_rate": 9.91446459727607e-07, + "logits/chosen": -0.4220297634601593, + "logits/rejected": -0.404453307390213, + "logps/chosen": -0.8769615888595581, + "logps/rejected": -0.9564313292503357, + "loss": 1.3423, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7539231777191162, + "rewards/margins": 0.15893957018852234, + "rewards/rejected": -1.9128626585006714, + "step": 73 + }, + { + "epoch": 0.15645853980839114, + "grad_norm": 0.5900676250457764, + "learning_rate": 9.90750650462105e-07, + "logits/chosen": -0.41884100437164307, + "logits/rejected": -0.38551777601242065, + "logps/chosen": -0.814996063709259, + "logps/rejected": -0.8892688751220703, + "loss": 1.3325, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.629992127418518, + "rewards/margins": 0.14854571223258972, + "rewards/rejected": -1.7785377502441406, + "step": 74 + }, + { + "epoch": 0.15857284440039643, + "grad_norm": 0.7245749831199646, + "learning_rate": 9.900278993766668e-07, + "logits/chosen": -0.3451727330684662, + "logits/rejected": -0.3348972201347351, + "logps/chosen": -0.8788102865219116, + "logps/rejected": -0.9273182153701782, + "loss": 1.377, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7576205730438232, + "rewards/margins": 0.09701582789421082, + "rewards/rejected": -1.8546364307403564, + "step": 75 + }, + { + "epoch": 0.16068714899240172, + "grad_norm": 0.3135383725166321, + "learning_rate": 9.89278246149752e-07, + "logits/chosen": -0.4140404760837555, + "logits/rejected": -0.38082340359687805, + "logps/chosen": -0.7513999342918396, + "logps/rejected": -0.7880118489265442, + "loss": 1.3921, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5027998685836792, + "rewards/margins": 0.07322371751070023, + "rewards/rejected": -1.5760236978530884, + "step": 76 + }, + { + "epoch": 0.162801453584407, + "grad_norm": 0.8594076633453369, + "learning_rate": 9.885017319367252e-07, + "logits/chosen": -0.35951656103134155, + "logits/rejected": -0.30456626415252686, + "logps/chosen": -0.7989844679832458, + "logps/rejected": -0.8145395517349243, + "loss": 1.4061, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5979689359664917, + "rewards/margins": 0.031110182404518127, + "rewards/rejected": -1.6290791034698486, + "step": 77 + }, + { + "epoch": 0.1649157581764123, + "grad_norm": 0.4393538534641266, + "learning_rate": 9.876983993675989e-07, + "logits/chosen": -0.33119240403175354, + "logits/rejected": -0.31970253586769104, + "logps/chosen": -0.721772313117981, + "logps/rejected": -0.7868390679359436, + "loss": 1.3325, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.443544626235962, + "rewards/margins": 0.13013358414173126, + "rewards/rejected": -1.5736781358718872, + "step": 78 + }, + { + "epoch": 0.1670300627684176, + "grad_norm": 0.8017925024032593, + "learning_rate": 9.868682925446909e-07, + "logits/chosen": -0.3608989417552948, + "logits/rejected": -0.3497124910354614, + "logps/chosen": -0.8117240071296692, + "logps/rejected": -0.8506529331207275, + "loss": 1.3671, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.6234480142593384, + "rewards/margins": 0.07785768806934357, + "rewards/rejected": -1.701305866241455, + "step": 79 + }, + { + "epoch": 0.16914436736042285, + "grad_norm": 0.5704994201660156, + "learning_rate": 9.860114570402054e-07, + "logits/chosen": -0.37441548705101013, + "logits/rejected": -0.32188406586647034, + "logps/chosen": -0.745419442653656, + "logps/rejected": -0.816170871257782, + "loss": 1.3315, + "rewards/accuracies": 0.484375, + "rewards/chosen": -1.490838885307312, + "rewards/margins": 0.1415030062198639, + "rewards/rejected": -1.632341742515564, + "step": 80 + }, + { + "epoch": 0.17125867195242814, + "grad_norm": 3.0005106925964355, + "learning_rate": 9.85127939893729e-07, + "logits/chosen": -0.34791454672813416, + "logits/rejected": -0.32542383670806885, + "logps/chosen": -0.7547991275787354, + "logps/rejected": -0.7868378162384033, + "loss": 1.3742, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5095982551574707, + "rewards/margins": 0.06407731771469116, + "rewards/rejected": -1.5736756324768066, + "step": 81 + }, + { + "epoch": 0.17337297654443343, + "grad_norm": 0.5891271829605103, + "learning_rate": 9.842177896096493e-07, + "logits/chosen": -0.38649702072143555, + "logits/rejected": -0.36892226338386536, + "logps/chosen": -0.7556143999099731, + "logps/rejected": -0.82858806848526, + "loss": 1.3233, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5112287998199463, + "rewards/margins": 0.14594702422618866, + "rewards/rejected": -1.65717613697052, + "step": 82 + }, + { + "epoch": 0.17548728113643872, + "grad_norm": 0.27861157059669495, + "learning_rate": 9.832810561544923e-07, + "logits/chosen": -0.38264670968055725, + "logits/rejected": -0.35908499360084534, + "logps/chosen": -0.7858557105064392, + "logps/rejected": -0.8571599721908569, + "loss": 1.3234, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5717114210128784, + "rewards/margins": 0.1426086127758026, + "rewards/rejected": -1.7143199443817139, + "step": 83 + }, + { + "epoch": 0.177601585728444, + "grad_norm": 0.8158763647079468, + "learning_rate": 9.823177909541793e-07, + "logits/chosen": -0.4076104760169983, + "logits/rejected": -0.3934200704097748, + "logps/chosen": -0.8089872002601624, + "logps/rejected": -0.8885407447814941, + "loss": 1.3476, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6179744005203247, + "rewards/margins": 0.15910708904266357, + "rewards/rejected": -1.7770814895629883, + "step": 84 + }, + { + "epoch": 0.1797158903204493, + "grad_norm": 0.46008333563804626, + "learning_rate": 9.813280468912022e-07, + "logits/chosen": -0.33124151825904846, + "logits/rejected": -0.34535717964172363, + "logps/chosen": -0.733020544052124, + "logps/rejected": -0.8716557621955872, + "loss": 1.2807, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.466041088104248, + "rewards/margins": 0.27727028727531433, + "rewards/rejected": -1.7433115243911743, + "step": 85 + }, + { + "epoch": 0.18183019491245458, + "grad_norm": 0.3784334659576416, + "learning_rate": 9.80311878301722e-07, + "logits/chosen": -0.40713849663734436, + "logits/rejected": -0.3808574080467224, + "logps/chosen": -0.7063947319984436, + "logps/rejected": -0.7589148879051208, + "loss": 1.3501, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4127894639968872, + "rewards/margins": 0.10504024475812912, + "rewards/rejected": -1.5178297758102417, + "step": 86 + }, + { + "epoch": 0.18394449950445987, + "grad_norm": 0.6137813329696655, + "learning_rate": 9.792693409725853e-07, + "logits/chosen": -0.4119255542755127, + "logits/rejected": -0.44221603870391846, + "logps/chosen": -0.795850932598114, + "logps/rejected": -0.8925026059150696, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.591701865196228, + "rewards/margins": 0.19330324232578278, + "rewards/rejected": -1.7850052118301392, + "step": 87 + }, + { + "epoch": 0.18605880409646514, + "grad_norm": 0.5354250073432922, + "learning_rate": 9.78200492138261e-07, + "logits/chosen": -0.3792279064655304, + "logits/rejected": -0.3789527714252472, + "logps/chosen": -0.7249161005020142, + "logps/rejected": -0.8088154792785645, + "loss": 1.3304, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.4498322010040283, + "rewards/margins": 0.16779886186122894, + "rewards/rejected": -1.617630958557129, + "step": 88 + }, + { + "epoch": 0.18817310868847043, + "grad_norm": 0.6911243796348572, + "learning_rate": 9.771053904776995e-07, + "logits/chosen": -0.38837429881095886, + "logits/rejected": -0.36597418785095215, + "logps/chosen": -0.7528612017631531, + "logps/rejected": -0.7981135249137878, + "loss": 1.3481, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.5057224035263062, + "rewards/margins": 0.09050464630126953, + "rewards/rejected": -1.5962270498275757, + "step": 89 + }, + { + "epoch": 0.19028741328047571, + "grad_norm": 0.37110790610313416, + "learning_rate": 9.759840961111097e-07, + "logits/chosen": -0.3804919421672821, + "logits/rejected": -0.38750600814819336, + "logps/chosen": -0.8673248291015625, + "logps/rejected": -0.9381619691848755, + "loss": 1.3303, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.734649658203125, + "rewards/margins": 0.14167429506778717, + "rewards/rejected": -1.876323938369751, + "step": 90 + }, + { + "epoch": 0.192401717872481, + "grad_norm": 0.8033086657524109, + "learning_rate": 9.748366705966593e-07, + "logits/chosen": -0.3804866075515747, + "logits/rejected": -0.31055447459220886, + "logps/chosen": -0.7535511255264282, + "logps/rejected": -0.7824290990829468, + "loss": 1.3706, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5071022510528564, + "rewards/margins": 0.057755980640649796, + "rewards/rejected": -1.5648581981658936, + "step": 91 + }, + { + "epoch": 0.1945160224644863, + "grad_norm": 0.6934167742729187, + "learning_rate": 9.736631769270957e-07, + "logits/chosen": -0.443461149930954, + "logits/rejected": -0.4398806691169739, + "logps/chosen": -0.8123858571052551, + "logps/rejected": -0.8972252607345581, + "loss": 1.3464, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6247717142105103, + "rewards/margins": 0.16967862844467163, + "rewards/rejected": -1.7944505214691162, + "step": 92 + }, + { + "epoch": 0.19663032705649158, + "grad_norm": 1.140067458152771, + "learning_rate": 9.724636795262866e-07, + "logits/chosen": -0.43793433904647827, + "logits/rejected": -0.4402340352535248, + "logps/chosen": -0.8155819177627563, + "logps/rejected": -0.8659977912902832, + "loss": 1.3621, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6311638355255127, + "rewards/margins": 0.10083187371492386, + "rewards/rejected": -1.7319955825805664, + "step": 93 + }, + { + "epoch": 0.19874463164849687, + "grad_norm": 1.8303897380828857, + "learning_rate": 9.712382442456844e-07, + "logits/chosen": -0.34288379549980164, + "logits/rejected": -0.36632782220840454, + "logps/chosen": -0.7338054776191711, + "logps/rejected": -0.8537961840629578, + "loss": 1.2942, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4676109552383423, + "rewards/margins": 0.23998141288757324, + "rewards/rejected": -1.7075923681259155, + "step": 94 + }, + { + "epoch": 0.20085893624050213, + "grad_norm": 0.34392252564430237, + "learning_rate": 9.6998693836071e-07, + "logits/chosen": -0.4381723999977112, + "logits/rejected": -0.4031081199645996, + "logps/chosen": -0.7130292057991028, + "logps/rejected": -0.7402217388153076, + "loss": 1.3766, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4260584115982056, + "rewards/margins": 0.0543849840760231, + "rewards/rejected": -1.4804434776306152, + "step": 95 + }, + { + "epoch": 0.20297324083250742, + "grad_norm": 0.4129842519760132, + "learning_rate": 9.687098305670604e-07, + "logits/chosen": -0.39796924591064453, + "logits/rejected": -0.3476859927177429, + "logps/chosen": -0.7520885467529297, + "logps/rejected": -0.8058558702468872, + "loss": 1.3663, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.5041770935058594, + "rewards/margins": 0.10753461718559265, + "rewards/rejected": -1.6117117404937744, + "step": 96 + }, + { + "epoch": 0.2050875454245127, + "grad_norm": 0.5054985284805298, + "learning_rate": 9.674069909769362e-07, + "logits/chosen": -0.3942393660545349, + "logits/rejected": -0.3627544045448303, + "logps/chosen": -0.733702540397644, + "logps/rejected": -0.781308650970459, + "loss": 1.342, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.467405080795288, + "rewards/margins": 0.09521210938692093, + "rewards/rejected": -1.562617301940918, + "step": 97 + }, + { + "epoch": 0.207201850016518, + "grad_norm": 0.6975870728492737, + "learning_rate": 9.66078491115194e-07, + "logits/chosen": -0.38557127118110657, + "logits/rejected": -0.3581204414367676, + "logps/chosen": -0.7359838485717773, + "logps/rejected": -0.7648134827613831, + "loss": 1.3841, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.4719676971435547, + "rewards/margins": 0.057659298181533813, + "rewards/rejected": -1.5296269655227661, + "step": 98 + }, + { + "epoch": 0.2093161546085233, + "grad_norm": 0.7575029730796814, + "learning_rate": 9.647244039154177e-07, + "logits/chosen": -0.3871467411518097, + "logits/rejected": -0.3941374123096466, + "logps/chosen": -0.6516871452331543, + "logps/rejected": -0.7066073417663574, + "loss": 1.3364, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3033742904663086, + "rewards/margins": 0.10984040796756744, + "rewards/rejected": -1.4132146835327148, + "step": 99 + }, + { + "epoch": 0.21143045920052858, + "grad_norm": 1.3344250917434692, + "learning_rate": 9.633448037159166e-07, + "logits/chosen": -0.40887755155563354, + "logits/rejected": -0.41733911633491516, + "logps/chosen": -0.6978950500488281, + "logps/rejected": -0.793424129486084, + "loss": 1.3076, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3957901000976562, + "rewards/margins": 0.1910584717988968, + "rewards/rejected": -1.586848258972168, + "step": 100 + }, + { + "epoch": 0.21354476379253387, + "grad_norm": 0.8798456788063049, + "learning_rate": 9.619397662556433e-07, + "logits/chosen": -0.302534282207489, + "logits/rejected": -0.29954588413238525, + "logps/chosen": -0.719552755355835, + "logps/rejected": -0.7628123164176941, + "loss": 1.3699, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.43910551071167, + "rewards/margins": 0.08651915192604065, + "rewards/rejected": -1.5256246328353882, + "step": 101 + }, + { + "epoch": 0.21565906838453916, + "grad_norm": 0.8746365308761597, + "learning_rate": 9.605093686700353e-07, + "logits/chosen": -0.372263640165329, + "logits/rejected": -0.3714321255683899, + "logps/chosen": -0.6665956974029541, + "logps/rejected": -0.7361368536949158, + "loss": 1.3173, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3331913948059082, + "rewards/margins": 0.1390824019908905, + "rewards/rejected": -1.4722737073898315, + "step": 102 + }, + { + "epoch": 0.21777337297654442, + "grad_norm": 0.8450930714607239, + "learning_rate": 9.590536894867812e-07, + "logits/chosen": -0.37228280305862427, + "logits/rejected": -0.37763556838035583, + "logps/chosen": -0.7425979375839233, + "logps/rejected": -0.7557005882263184, + "loss": 1.4085, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4851958751678467, + "rewards/margins": 0.026205357164144516, + "rewards/rejected": -1.5114011764526367, + "step": 103 + }, + { + "epoch": 0.2198876775685497, + "grad_norm": 0.5075035691261292, + "learning_rate": 9.575728086215091e-07, + "logits/chosen": -0.4433964788913727, + "logits/rejected": -0.3782787024974823, + "logps/chosen": -0.7308244109153748, + "logps/rejected": -0.8043883442878723, + "loss": 1.3353, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4616488218307495, + "rewards/margins": 0.14712783694267273, + "rewards/rejected": -1.6087766885757446, + "step": 104 + }, + { + "epoch": 0.222001982160555, + "grad_norm": 1.0270946025848389, + "learning_rate": 9.560668073733993e-07, + "logits/chosen": -0.3593980073928833, + "logits/rejected": -0.3159312903881073, + "logps/chosen": -0.757469892501831, + "logps/rejected": -0.8256179094314575, + "loss": 1.3289, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.514939785003662, + "rewards/margins": 0.13629598915576935, + "rewards/rejected": -1.651235818862915, + "step": 105 + }, + { + "epoch": 0.2241162867525603, + "grad_norm": 0.8390078544616699, + "learning_rate": 9.54535768420721e-07, + "logits/chosen": -0.3266332149505615, + "logits/rejected": -0.3008713722229004, + "logps/chosen": -0.7286102771759033, + "logps/rejected": -0.7803273797035217, + "loss": 1.3593, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4572205543518066, + "rewards/margins": 0.10343428701162338, + "rewards/rejected": -1.5606547594070435, + "step": 106 + }, + { + "epoch": 0.22623059134456558, + "grad_norm": 1.130595088005066, + "learning_rate": 9.529797758162934e-07, + "logits/chosen": -0.36109817028045654, + "logits/rejected": -0.34797021746635437, + "logps/chosen": -0.7723361253738403, + "logps/rejected": -0.8873662352561951, + "loss": 1.2956, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5446722507476807, + "rewards/margins": 0.23006024956703186, + "rewards/rejected": -1.7747324705123901, + "step": 107 + }, + { + "epoch": 0.22834489593657087, + "grad_norm": 0.9399718642234802, + "learning_rate": 9.513989149828717e-07, + "logits/chosen": -0.3596777021884918, + "logits/rejected": -0.3660539388656616, + "logps/chosen": -0.7130635976791382, + "logps/rejected": -0.7378955483436584, + "loss": 1.3774, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.4261271953582764, + "rewards/margins": 0.04966379329562187, + "rewards/rejected": -1.475791096687317, + "step": 108 + }, + { + "epoch": 0.23045920052857616, + "grad_norm": 1.097594976425171, + "learning_rate": 9.49793272708457e-07, + "logits/chosen": -0.31783169507980347, + "logits/rejected": -0.3008044362068176, + "logps/chosen": -0.6933202147483826, + "logps/rejected": -0.7510000467300415, + "loss": 1.3498, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.3866404294967651, + "rewards/margins": 0.1153596043586731, + "rewards/rejected": -1.502000093460083, + "step": 109 + }, + { + "epoch": 0.23257350512058145, + "grad_norm": 1.096330165863037, + "learning_rate": 9.481629371415313e-07, + "logits/chosen": -0.3582899570465088, + "logits/rejected": -0.3120020031929016, + "logps/chosen": -0.817268431186676, + "logps/rejected": -0.8862374424934387, + "loss": 1.3349, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.634536862373352, + "rewards/margins": 0.13793781399726868, + "rewards/rejected": -1.7724748849868774, + "step": 110 + }, + { + "epoch": 0.2346878097125867, + "grad_norm": 0.8261978626251221, + "learning_rate": 9.465079977862192e-07, + "logits/chosen": -0.41336673498153687, + "logits/rejected": -0.39544352889060974, + "logps/chosen": -0.7673372030258179, + "logps/rejected": -0.8331737518310547, + "loss": 1.3373, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5346744060516357, + "rewards/margins": 0.1316729635000229, + "rewards/rejected": -1.6663475036621094, + "step": 111 + }, + { + "epoch": 0.236802114304592, + "grad_norm": 0.5922806262969971, + "learning_rate": 9.448285454973737e-07, + "logits/chosen": -0.3224758207798004, + "logits/rejected": -0.3118049204349518, + "logps/chosen": -0.7584627866744995, + "logps/rejected": -0.8859898447990417, + "loss": 1.2731, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.516925573348999, + "rewards/margins": 0.2550540566444397, + "rewards/rejected": -1.7719796895980835, + "step": 112 + }, + { + "epoch": 0.2389164188965973, + "grad_norm": 1.3172541856765747, + "learning_rate": 9.431246724755877e-07, + "logits/chosen": -0.4287208318710327, + "logits/rejected": -0.3984590172767639, + "logps/chosen": -0.7587048411369324, + "logps/rejected": -0.7860502004623413, + "loss": 1.3832, + "rewards/accuracies": 0.4765625, + "rewards/chosen": -1.5174096822738647, + "rewards/margins": 0.05469079315662384, + "rewards/rejected": -1.5721004009246826, + "step": 113 + }, + { + "epoch": 0.24103072348860258, + "grad_norm": 0.7749882340431213, + "learning_rate": 9.413964722621337e-07, + "logits/chosen": -0.39085906744003296, + "logits/rejected": -0.3316206932067871, + "logps/chosen": -0.7035898566246033, + "logps/rejected": -0.7375759482383728, + "loss": 1.387, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.4071797132492065, + "rewards/margins": 0.06797221302986145, + "rewards/rejected": -1.4751518964767456, + "step": 114 + }, + { + "epoch": 0.24314502808060787, + "grad_norm": 1.0914056301116943, + "learning_rate": 9.396440397338272e-07, + "logits/chosen": -0.38826486468315125, + "logits/rejected": -0.35520774126052856, + "logps/chosen": -0.7385872602462769, + "logps/rejected": -0.7974889278411865, + "loss": 1.3477, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4771745204925537, + "rewards/margins": 0.11780343949794769, + "rewards/rejected": -1.594977855682373, + "step": 115 + }, + { + "epoch": 0.24525933267261316, + "grad_norm": 1.2966018915176392, + "learning_rate": 9.378674710978183e-07, + "logits/chosen": -0.36493802070617676, + "logits/rejected": -0.34763696789741516, + "logps/chosen": -0.6731826663017273, + "logps/rejected": -0.7645149827003479, + "loss": 1.3, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3463653326034546, + "rewards/margins": 0.18266455829143524, + "rewards/rejected": -1.5290299654006958, + "step": 116 + }, + { + "epoch": 0.24737363726461845, + "grad_norm": 0.49401605129241943, + "learning_rate": 9.360668638863109e-07, + "logits/chosen": -0.40416795015335083, + "logits/rejected": -0.3815993070602417, + "logps/chosen": -0.719497799873352, + "logps/rejected": -0.7588324546813965, + "loss": 1.3621, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.438995599746704, + "rewards/margins": 0.07866920530796051, + "rewards/rejected": -1.517664909362793, + "step": 117 + }, + { + "epoch": 0.2494879418566237, + "grad_norm": 1.0603238344192505, + "learning_rate": 9.342423169512071e-07, + "logits/chosen": -0.3857055604457855, + "logits/rejected": -0.3524513244628906, + "logps/chosen": -0.7373769283294678, + "logps/rejected": -0.7971038818359375, + "loss": 1.3358, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4747538566589355, + "rewards/margins": 0.11945393681526184, + "rewards/rejected": -1.594207763671875, + "step": 118 + }, + { + "epoch": 0.251602246448629, + "grad_norm": 0.9880490303039551, + "learning_rate": 9.323939304586804e-07, + "logits/chosen": -0.31455785036087036, + "logits/rejected": -0.3102484941482544, + "logps/chosen": -0.7276102900505066, + "logps/rejected": -0.7446941137313843, + "loss": 1.3928, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.4552205801010132, + "rewards/margins": 0.034167706966400146, + "rewards/rejected": -1.4893882274627686, + "step": 119 + }, + { + "epoch": 0.2537165510406343, + "grad_norm": 1.0419566631317139, + "learning_rate": 9.305218058836776e-07, + "logits/chosen": -0.38093918561935425, + "logits/rejected": -0.3588898181915283, + "logps/chosen": -0.715582013130188, + "logps/rejected": -0.8271002769470215, + "loss": 1.2934, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.431164026260376, + "rewards/margins": 0.22303667664527893, + "rewards/rejected": -1.654200553894043, + "step": 120 + }, + { + "epoch": 0.2558308556326396, + "grad_norm": 0.657620370388031, + "learning_rate": 9.286260460043473e-07, + "logits/chosen": -0.45690783858299255, + "logits/rejected": -0.4082674980163574, + "logps/chosen": -0.6932571530342102, + "logps/rejected": -0.7631082534790039, + "loss": 1.3398, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.3865143060684204, + "rewards/margins": 0.13970226049423218, + "rewards/rejected": -1.5262165069580078, + "step": 121 + }, + { + "epoch": 0.25794516022464486, + "grad_norm": 0.983686089515686, + "learning_rate": 9.267067548963974e-07, + "logits/chosen": -0.40266987681388855, + "logits/rejected": -0.37586671113967896, + "logps/chosen": -0.7362720966339111, + "logps/rejected": -0.7538987398147583, + "loss": 1.4066, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4725441932678223, + "rewards/margins": 0.03525342047214508, + "rewards/rejected": -1.5077974796295166, + "step": 122 + }, + { + "epoch": 0.26005946481665015, + "grad_norm": 1.0076361894607544, + "learning_rate": 9.24764037927381e-07, + "logits/chosen": -0.4461461007595062, + "logits/rejected": -0.40700826048851013, + "logps/chosen": -0.7206646800041199, + "logps/rejected": -0.7489192485809326, + "loss": 1.3759, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4413293600082397, + "rewards/margins": 0.05650928616523743, + "rewards/rejected": -1.4978384971618652, + "step": 123 + }, + { + "epoch": 0.26217376940865544, + "grad_norm": 0.933315098285675, + "learning_rate": 9.22798001750913e-07, + "logits/chosen": -0.3966676890850067, + "logits/rejected": -0.3572196960449219, + "logps/chosen": -0.7075096368789673, + "logps/rejected": -0.7406759262084961, + "loss": 1.3667, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4150192737579346, + "rewards/margins": 0.06633266806602478, + "rewards/rejected": -1.4813518524169922, + "step": 124 + }, + { + "epoch": 0.26428807400066073, + "grad_norm": 0.6277392506599426, + "learning_rate": 9.20808754300814e-07, + "logits/chosen": -0.3555490970611572, + "logits/rejected": -0.35786163806915283, + "logps/chosen": -0.7549921274185181, + "logps/rejected": -0.832869291305542, + "loss": 1.3175, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.5099842548370361, + "rewards/margins": 0.15575438737869263, + "rewards/rejected": -1.665738582611084, + "step": 125 + }, + { + "epoch": 0.266402378592666, + "grad_norm": 0.7172744274139404, + "learning_rate": 9.18796404785185e-07, + "logits/chosen": -0.41230690479278564, + "logits/rejected": -0.39935630559921265, + "logps/chosen": -0.7129833698272705, + "logps/rejected": -0.7888559103012085, + "loss": 1.3167, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.425966739654541, + "rewards/margins": 0.15174514055252075, + "rewards/rejected": -1.577711820602417, + "step": 126 + }, + { + "epoch": 0.2685166831846713, + "grad_norm": 1.2629508972167969, + "learning_rate": 9.16761063680412e-07, + "logits/chosen": -0.36754000186920166, + "logits/rejected": -0.3541562259197235, + "logps/chosen": -0.6992133855819702, + "logps/rejected": -0.7668892741203308, + "loss": 1.3735, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3984267711639404, + "rewards/margins": 0.13535188138484955, + "rewards/rejected": -1.5337785482406616, + "step": 127 + }, + { + "epoch": 0.2706309877766766, + "grad_norm": 0.7024405598640442, + "learning_rate": 9.147028427251009e-07, + "logits/chosen": -0.4014585018157959, + "logits/rejected": -0.40560898184776306, + "logps/chosen": -0.727234959602356, + "logps/rejected": -0.8070081472396851, + "loss": 1.3138, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.454469919204712, + "rewards/margins": 0.15954652428627014, + "rewards/rejected": -1.6140162944793701, + "step": 128 + }, + { + "epoch": 0.2727452923686819, + "grad_norm": 1.6173532009124756, + "learning_rate": 9.126218549139433e-07, + "logits/chosen": -0.32572367787361145, + "logits/rejected": -0.3470613956451416, + "logps/chosen": -0.7555541396141052, + "logps/rejected": -0.8856738209724426, + "loss": 1.2461, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.5111082792282104, + "rewards/margins": 0.26023951172828674, + "rewards/rejected": -1.7713476419448853, + "step": 129 + }, + { + "epoch": 0.2748595969606871, + "grad_norm": 0.5878487229347229, + "learning_rate": 9.105182144915129e-07, + "logits/chosen": -0.39267170429229736, + "logits/rejected": -0.3448992967605591, + "logps/chosen": -0.6776289343833923, + "logps/rejected": -0.7530183792114258, + "loss": 1.3242, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.3552578687667847, + "rewards/margins": 0.15077897906303406, + "rewards/rejected": -1.5060367584228516, + "step": 130 + }, + { + "epoch": 0.2769739015526924, + "grad_norm": 0.43264809250831604, + "learning_rate": 9.08392036945994e-07, + "logits/chosen": -0.39980950951576233, + "logits/rejected": -0.4247930645942688, + "logps/chosen": -0.7898982167243958, + "logps/rejected": -0.8856299519538879, + "loss": 1.3004, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.5797964334487915, + "rewards/margins": 0.19146347045898438, + "rewards/rejected": -1.7712599039077759, + "step": 131 + }, + { + "epoch": 0.2790882061446977, + "grad_norm": 1.0348538160324097, + "learning_rate": 9.062434390028407e-07, + "logits/chosen": -0.35729700326919556, + "logits/rejected": -0.3265542984008789, + "logps/chosen": -0.7120587229728699, + "logps/rejected": -0.771691083908081, + "loss": 1.3374, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.4241174459457397, + "rewards/margins": 0.11926469206809998, + "rewards/rejected": -1.543382167816162, + "step": 132 + }, + { + "epoch": 0.281202510736703, + "grad_norm": 2.0902225971221924, + "learning_rate": 9.04072538618369e-07, + "logits/chosen": -0.4942469298839569, + "logits/rejected": -0.48699846863746643, + "logps/chosen": -0.7882512211799622, + "logps/rejected": -0.8270165920257568, + "loss": 1.3715, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5765024423599243, + "rewards/margins": 0.07753071188926697, + "rewards/rejected": -1.6540331840515137, + "step": 133 + }, + { + "epoch": 0.2833168153287083, + "grad_norm": 1.6436113119125366, + "learning_rate": 9.018794549732817e-07, + "logits/chosen": -0.41133156418800354, + "logits/rejected": -0.4146718382835388, + "logps/chosen": -0.779824435710907, + "logps/rejected": -0.9421006441116333, + "loss": 1.2521, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.559648871421814, + "rewards/margins": 0.324552446603775, + "rewards/rejected": -1.8842012882232666, + "step": 134 + }, + { + "epoch": 0.28543111992071357, + "grad_norm": 0.8831859827041626, + "learning_rate": 8.996643084661244e-07, + "logits/chosen": -0.42452165484428406, + "logits/rejected": -0.3798604905605316, + "logps/chosen": -0.6499216556549072, + "logps/rejected": -0.7796702980995178, + "loss": 1.2581, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.2998433113098145, + "rewards/margins": 0.25949734449386597, + "rewards/rejected": -1.5593405961990356, + "step": 135 + }, + { + "epoch": 0.28754542451271886, + "grad_norm": 0.8031218647956848, + "learning_rate": 8.974272207066767e-07, + "logits/chosen": -0.38131940364837646, + "logits/rejected": -0.3854255676269531, + "logps/chosen": -0.7026851773262024, + "logps/rejected": -0.762391209602356, + "loss": 1.3333, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4053703546524048, + "rewards/margins": 0.11941206455230713, + "rewards/rejected": -1.524782419204712, + "step": 136 + }, + { + "epoch": 0.28965972910472415, + "grad_norm": 1.4455821514129639, + "learning_rate": 8.951683145092748e-07, + "logits/chosen": -0.42824965715408325, + "logits/rejected": -0.4320424795150757, + "logps/chosen": -0.7893270254135132, + "logps/rejected": -0.8517144322395325, + "loss": 1.3652, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5786540508270264, + "rewards/margins": 0.12477481365203857, + "rewards/rejected": -1.703428864479065, + "step": 137 + }, + { + "epoch": 0.29177403369672944, + "grad_norm": 0.6299450397491455, + "learning_rate": 8.928877138860706e-07, + "logits/chosen": -0.4388589560985565, + "logits/rejected": -0.40156903862953186, + "logps/chosen": -0.7346572875976562, + "logps/rejected": -0.8166492581367493, + "loss": 1.3134, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4693145751953125, + "rewards/margins": 0.16398391127586365, + "rewards/rejected": -1.6332985162734985, + "step": 138 + }, + { + "epoch": 0.29388833828873473, + "grad_norm": 2.784437417984009, + "learning_rate": 8.905855440402224e-07, + "logits/chosen": -0.405662477016449, + "logits/rejected": -0.35549795627593994, + "logps/chosen": -0.7482771277427673, + "logps/rejected": -0.795568585395813, + "loss": 1.3656, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.4965542554855347, + "rewards/margins": 0.09458285570144653, + "rewards/rejected": -1.591137170791626, + "step": 139 + }, + { + "epoch": 0.29600264288074, + "grad_norm": 0.4958692193031311, + "learning_rate": 8.882619313590212e-07, + "logits/chosen": -0.3814452886581421, + "logits/rejected": -0.35715553164482117, + "logps/chosen": -0.7731542587280273, + "logps/rejected": -0.8285202980041504, + "loss": 1.3776, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.5463085174560547, + "rewards/margins": 0.11073210835456848, + "rewards/rejected": -1.6570405960083008, + "step": 140 + }, + { + "epoch": 0.2981169474727453, + "grad_norm": 0.4597362279891968, + "learning_rate": 8.859170034069532e-07, + "logits/chosen": -0.388383150100708, + "logits/rejected": -0.4071737229824066, + "logps/chosen": -0.7263504266738892, + "logps/rejected": -0.769676148891449, + "loss": 1.3712, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4527008533477783, + "rewards/margins": 0.08665145933628082, + "rewards/rejected": -1.539352297782898, + "step": 141 + }, + { + "epoch": 0.3002312520647506, + "grad_norm": 0.4914930760860443, + "learning_rate": 8.835508889186956e-07, + "logits/chosen": -0.41084378957748413, + "logits/rejected": -0.3823031187057495, + "logps/chosen": -0.7565821409225464, + "logps/rejected": -0.9084322452545166, + "loss": 1.2717, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5131642818450928, + "rewards/margins": 0.3037002384662628, + "rewards/rejected": -1.8168644905090332, + "step": 142 + }, + { + "epoch": 0.3023455566567559, + "grad_norm": 2.0075581073760986, + "learning_rate": 8.811637177920499e-07, + "logits/chosen": -0.4438302516937256, + "logits/rejected": -0.4916025698184967, + "logps/chosen": -0.800719141960144, + "logps/rejected": -0.8658267855644226, + "loss": 1.358, + "rewards/accuracies": 0.515625, + "rewards/chosen": -1.601438283920288, + "rewards/margins": 0.1302153617143631, + "rewards/rejected": -1.7316535711288452, + "step": 143 + }, + { + "epoch": 0.3044598612487612, + "grad_norm": 1.1243022680282593, + "learning_rate": 8.7875562108081e-07, + "logits/chosen": -0.40519949793815613, + "logits/rejected": -0.3905750811100006, + "logps/chosen": -0.689585268497467, + "logps/rejected": -0.7312421798706055, + "loss": 1.3503, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.379170536994934, + "rewards/margins": 0.08331384509801865, + "rewards/rejected": -1.462484359741211, + "step": 144 + }, + { + "epoch": 0.3065741658407664, + "grad_norm": 0.7543137669563293, + "learning_rate": 8.76326730987568e-07, + "logits/chosen": -0.4696752727031708, + "logits/rejected": -0.4357326626777649, + "logps/chosen": -0.7813425660133362, + "logps/rejected": -0.8276973962783813, + "loss": 1.3794, + "rewards/accuracies": 0.5234375, + "rewards/chosen": -1.5626851320266724, + "rewards/margins": 0.09270970523357391, + "rewards/rejected": -1.6553947925567627, + "step": 145 + }, + { + "epoch": 0.3086884704327717, + "grad_norm": 1.3136053085327148, + "learning_rate": 8.738771808564555e-07, + "logits/chosen": -0.4262731075286865, + "logits/rejected": -0.44038820266723633, + "logps/chosen": -0.697494387626648, + "logps/rejected": -0.8369535803794861, + "loss": 1.2699, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.394988775253296, + "rewards/margins": 0.2789183557033539, + "rewards/rejected": -1.6739071607589722, + "step": 146 + }, + { + "epoch": 0.310802775024777, + "grad_norm": 2.221562385559082, + "learning_rate": 8.714071051658245e-07, + "logits/chosen": -0.40089336037635803, + "logits/rejected": -0.37991875410079956, + "logps/chosen": -0.7704445123672485, + "logps/rejected": -0.859091579914093, + "loss": 1.2987, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.540889024734497, + "rewards/margins": 0.17729414999485016, + "rewards/rejected": -1.718183159828186, + "step": 147 + }, + { + "epoch": 0.3129170796167823, + "grad_norm": 1.5049912929534912, + "learning_rate": 8.689166395208636e-07, + "logits/chosen": -0.38984015583992004, + "logits/rejected": -0.35900723934173584, + "logps/chosen": -0.6424779891967773, + "logps/rejected": -0.7145389318466187, + "loss": 1.3261, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.2849559783935547, + "rewards/margins": 0.14412200450897217, + "rewards/rejected": -1.4290778636932373, + "step": 148 + }, + { + "epoch": 0.31503138420878757, + "grad_norm": 0.36125388741493225, + "learning_rate": 8.664059206461534e-07, + "logits/chosen": -0.3490441143512726, + "logits/rejected": -0.3219914436340332, + "logps/chosen": -0.7200264930725098, + "logps/rejected": -0.7924249768257141, + "loss": 1.3476, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.4400529861450195, + "rewards/margins": 0.1447969526052475, + "rewards/rejected": -1.5848499536514282, + "step": 149 + }, + { + "epoch": 0.31714568880079286, + "grad_norm": 1.039840579032898, + "learning_rate": 8.638750863781612e-07, + "logits/chosen": -0.40701645612716675, + "logits/rejected": -0.406186580657959, + "logps/chosen": -0.7083575129508972, + "logps/rejected": -0.7766748070716858, + "loss": 1.3263, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.4167150259017944, + "rewards/margins": 0.1366347074508667, + "rewards/rejected": -1.5533496141433716, + "step": 150 + }, + { + "epoch": 0.31925999339279815, + "grad_norm": 0.7128564119338989, + "learning_rate": 8.613242756576728e-07, + "logits/chosen": -0.40932926535606384, + "logits/rejected": -0.4234562814235687, + "logps/chosen": -0.6775843501091003, + "logps/rejected": -0.7866222858428955, + "loss": 1.2834, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.3551687002182007, + "rewards/margins": 0.2180757373571396, + "rewards/rejected": -1.573244571685791, + "step": 151 + }, + { + "epoch": 0.32137429798480344, + "grad_norm": 1.1701059341430664, + "learning_rate": 8.587536285221655e-07, + "logits/chosen": -0.3654797077178955, + "logits/rejected": -0.3181680738925934, + "logps/chosen": -0.6686022877693176, + "logps/rejected": -0.7058504223823547, + "loss": 1.3612, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.3372045755386353, + "rewards/margins": 0.07449636608362198, + "rewards/rejected": -1.4117008447647095, + "step": 152 + }, + { + "epoch": 0.3234886025768087, + "grad_norm": 0.8239700794219971, + "learning_rate": 8.561632860981204e-07, + "logits/chosen": -0.42527130246162415, + "logits/rejected": -0.4091627299785614, + "logps/chosen": -0.6969794631004333, + "logps/rejected": -0.8019355535507202, + "loss": 1.2974, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3939589262008667, + "rewards/margins": 0.20991206169128418, + "rewards/rejected": -1.6038711071014404, + "step": 153 + }, + { + "epoch": 0.325602907168814, + "grad_norm": 1.4885636568069458, + "learning_rate": 8.535533905932737e-07, + "logits/chosen": -0.4126192331314087, + "logits/rejected": -0.41548141837120056, + "logps/chosen": -0.7076549530029297, + "logps/rejected": -0.7940821051597595, + "loss": 1.3198, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4153099060058594, + "rewards/margins": 0.17285437881946564, + "rewards/rejected": -1.588164210319519, + "step": 154 + }, + { + "epoch": 0.3277172117608193, + "grad_norm": 1.439434289932251, + "learning_rate": 8.509240852888106e-07, + "logits/chosen": -0.3763914704322815, + "logits/rejected": -0.3617165684700012, + "logps/chosen": -0.7189474105834961, + "logps/rejected": -0.827629804611206, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4378948211669922, + "rewards/margins": 0.2173648476600647, + "rewards/rejected": -1.655259609222412, + "step": 155 + }, + { + "epoch": 0.3298315163528246, + "grad_norm": 1.4505418539047241, + "learning_rate": 8.482755145314985e-07, + "logits/chosen": -0.37879478931427, + "logits/rejected": -0.38689684867858887, + "logps/chosen": -0.7011865973472595, + "logps/rejected": -0.8019431829452515, + "loss": 1.3158, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.402373194694519, + "rewards/margins": 0.2015131413936615, + "rewards/rejected": -1.603886365890503, + "step": 156 + }, + { + "epoch": 0.3319458209448299, + "grad_norm": 2.0968713760375977, + "learning_rate": 8.45607823725763e-07, + "logits/chosen": -0.4366365075111389, + "logits/rejected": -0.41210681200027466, + "logps/chosen": -0.6455651521682739, + "logps/rejected": -0.7228428721427917, + "loss": 1.3247, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.2911303043365479, + "rewards/margins": 0.1545555144548416, + "rewards/rejected": -1.4456857442855835, + "step": 157 + }, + { + "epoch": 0.3340601255368352, + "grad_norm": 0.6716106534004211, + "learning_rate": 8.429211593257052e-07, + "logits/chosen": -0.42992207407951355, + "logits/rejected": -0.4105672836303711, + "logps/chosen": -0.6981461048126221, + "logps/rejected": -0.7909567952156067, + "loss": 1.3128, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3962922096252441, + "rewards/margins": 0.1856214702129364, + "rewards/rejected": -1.5819135904312134, + "step": 158 + }, + { + "epoch": 0.33617443012884046, + "grad_norm": 2.4430501461029053, + "learning_rate": 8.402156688270612e-07, + "logits/chosen": -0.4184916317462921, + "logits/rejected": -0.3943992257118225, + "logps/chosen": -0.6568948030471802, + "logps/rejected": -0.7506390810012817, + "loss": 1.2992, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.3137896060943604, + "rewards/margins": 0.18748846650123596, + "rewards/rejected": -1.5012781620025635, + "step": 159 + }, + { + "epoch": 0.3382887347208457, + "grad_norm": 2.0322091579437256, + "learning_rate": 8.374915007591052e-07, + "logits/chosen": -0.4713057577610016, + "logits/rejected": -0.42163771390914917, + "logps/chosen": -0.7347853779792786, + "logps/rejected": -0.7770044207572937, + "loss": 1.3801, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.4695707559585571, + "rewards/margins": 0.0844380110502243, + "rewards/rejected": -1.5540088415145874, + "step": 160 + }, + { + "epoch": 0.340403039312851, + "grad_norm": 0.4045500159263611, + "learning_rate": 8.347488046764948e-07, + "logits/chosen": -0.39465126395225525, + "logits/rejected": -0.3961923122406006, + "logps/chosen": -0.601732075214386, + "logps/rejected": -0.694148600101471, + "loss": 1.2859, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.203464150428772, + "rewards/margins": 0.18483319878578186, + "rewards/rejected": -1.388297200202942, + "step": 161 + }, + { + "epoch": 0.3425173439048563, + "grad_norm": 2.79396915435791, + "learning_rate": 8.319877311510612e-07, + "logits/chosen": -0.4311378002166748, + "logits/rejected": -0.4248836636543274, + "logps/chosen": -0.6813413500785828, + "logps/rejected": -0.775830864906311, + "loss": 1.3001, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.3626827001571655, + "rewards/margins": 0.18897925317287445, + "rewards/rejected": -1.551661729812622, + "step": 162 + }, + { + "epoch": 0.34463164849686156, + "grad_norm": 0.714146077632904, + "learning_rate": 8.292084317635419e-07, + "logits/chosen": -0.4060715436935425, + "logits/rejected": -0.3770482540130615, + "logps/chosen": -0.7176523208618164, + "logps/rejected": -0.7973593473434448, + "loss": 1.324, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.4353046417236328, + "rewards/margins": 0.15941408276557922, + "rewards/rejected": -1.5947186946868896, + "step": 163 + }, + { + "epoch": 0.34674595308886685, + "grad_norm": 1.6007037162780762, + "learning_rate": 8.264110590952607e-07, + "logits/chosen": -0.49063974618911743, + "logits/rejected": -0.5119628310203552, + "logps/chosen": -0.7263911366462708, + "logps/rejected": -0.9138184785842896, + "loss": 1.2439, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.4527822732925415, + "rewards/margins": 0.3748546540737152, + "rewards/rejected": -1.827636957168579, + "step": 164 + }, + { + "epoch": 0.34886025768087214, + "grad_norm": 1.4566830396652222, + "learning_rate": 8.235957667197494e-07, + "logits/chosen": -0.4681779146194458, + "logits/rejected": -0.46475380659103394, + "logps/chosen": -0.6923782229423523, + "logps/rejected": -0.7901281118392944, + "loss": 1.295, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.3847564458847046, + "rewards/margins": 0.19549959897994995, + "rewards/rejected": -1.5802562236785889, + "step": 165 + }, + { + "epoch": 0.35097456227287743, + "grad_norm": 3.0825328826904297, + "learning_rate": 8.207627091943177e-07, + "logits/chosen": -0.4294862151145935, + "logits/rejected": -0.42411237955093384, + "logps/chosen": -0.6851246356964111, + "logps/rejected": -0.7844961881637573, + "loss": 1.2871, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3702492713928223, + "rewards/margins": 0.19874317944049835, + "rewards/rejected": -1.5689923763275146, + "step": 166 + }, + { + "epoch": 0.3530888668648827, + "grad_norm": 1.0783339738845825, + "learning_rate": 8.179120420515675e-07, + "logits/chosen": -0.4528030455112457, + "logits/rejected": -0.4626815617084503, + "logps/chosen": -0.703376293182373, + "logps/rejected": -0.8752757906913757, + "loss": 1.2193, + "rewards/accuracies": 0.703125, + "rewards/chosen": -1.406752586364746, + "rewards/margins": 0.34379899501800537, + "rewards/rejected": -1.7505515813827515, + "step": 167 + }, + { + "epoch": 0.355203171456888, + "grad_norm": 2.6788036823272705, + "learning_rate": 8.150439217908556e-07, + "logits/chosen": -0.44946759939193726, + "logits/rejected": -0.47430264949798584, + "logps/chosen": -0.751136839389801, + "logps/rejected": -0.874577522277832, + "loss": 1.29, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.502273678779602, + "rewards/margins": 0.24688144028186798, + "rewards/rejected": -1.749155044555664, + "step": 168 + }, + { + "epoch": 0.3573174760488933, + "grad_norm": 0.9087730050086975, + "learning_rate": 8.121585058696999e-07, + "logits/chosen": -0.47294262051582336, + "logits/rejected": -0.46765226125717163, + "logps/chosen": -0.7291173934936523, + "logps/rejected": -0.7999277114868164, + "loss": 1.3482, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4582347869873047, + "rewards/margins": 0.1416206806898117, + "rewards/rejected": -1.5998554229736328, + "step": 169 + }, + { + "epoch": 0.3594317806408986, + "grad_norm": 3.392674207687378, + "learning_rate": 8.092559526951374e-07, + "logits/chosen": -0.5026620626449585, + "logits/rejected": -0.46620574593544006, + "logps/chosen": -0.746992290019989, + "logps/rejected": -0.8266301155090332, + "loss": 1.3202, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.493984580039978, + "rewards/margins": 0.15927578508853912, + "rewards/rejected": -1.6532602310180664, + "step": 170 + }, + { + "epoch": 0.3615460852329039, + "grad_norm": 1.27628755569458, + "learning_rate": 8.063364216150256e-07, + "logits/chosen": -0.5211395025253296, + "logits/rejected": -0.5419963598251343, + "logps/chosen": -0.7919114828109741, + "logps/rejected": -0.8731362223625183, + "loss": 1.3228, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5838229656219482, + "rewards/margins": 0.16244953870773315, + "rewards/rejected": -1.7462724447250366, + "step": 171 + }, + { + "epoch": 0.36366038982490917, + "grad_norm": 0.8269656896591187, + "learning_rate": 8.034000729092967e-07, + "logits/chosen": -0.49545183777809143, + "logits/rejected": -0.4716613292694092, + "logps/chosen": -0.719520092010498, + "logps/rejected": -0.7876347303390503, + "loss": 1.3367, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.439040184020996, + "rewards/margins": 0.13622930645942688, + "rewards/rejected": -1.5752694606781006, + "step": 172 + }, + { + "epoch": 0.36577469441691446, + "grad_norm": 0.6049383282661438, + "learning_rate": 8.004470677811559e-07, + "logits/chosen": -0.45276379585266113, + "logits/rejected": -0.42617955803871155, + "logps/chosen": -0.7097947597503662, + "logps/rejected": -0.7606989145278931, + "loss": 1.3909, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.4195895195007324, + "rewards/margins": 0.10180822014808655, + "rewards/rejected": -1.5213978290557861, + "step": 173 + }, + { + "epoch": 0.36788899900891975, + "grad_norm": 3.980013847351074, + "learning_rate": 7.974775683482337e-07, + "logits/chosen": -0.4783569574356079, + "logits/rejected": -0.43521156907081604, + "logps/chosen": -0.7623491287231445, + "logps/rejected": -0.8719285130500793, + "loss": 1.2838, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.524698257446289, + "rewards/margins": 0.2191585898399353, + "rewards/rejected": -1.7438570261001587, + "step": 174 + }, + { + "epoch": 0.370003303600925, + "grad_norm": 1.024530053138733, + "learning_rate": 7.94491737633684e-07, + "logits/chosen": -0.5009916424751282, + "logits/rejected": -0.48874592781066895, + "logps/chosen": -0.7552992701530457, + "logps/rejected": -0.8485872745513916, + "loss": 1.3153, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5105985403060913, + "rewards/margins": 0.18657605350017548, + "rewards/rejected": -1.6971745491027832, + "step": 175 + }, + { + "epoch": 0.37211760819293027, + "grad_norm": 1.5952919721603394, + "learning_rate": 7.91489739557236e-07, + "logits/chosen": -0.4424138069152832, + "logits/rejected": -0.4334307312965393, + "logps/chosen": -0.6956002116203308, + "logps/rejected": -0.8018803000450134, + "loss": 1.3011, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.3912004232406616, + "rewards/margins": 0.21256020665168762, + "rewards/rejected": -1.6037606000900269, + "step": 176 + }, + { + "epoch": 0.37423191278493556, + "grad_norm": 1.8331164121627808, + "learning_rate": 7.884717389261934e-07, + "logits/chosen": -0.4836267828941345, + "logits/rejected": -0.5018677115440369, + "logps/chosen": -0.7895969152450562, + "logps/rejected": -0.927432656288147, + "loss": 1.2467, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5791938304901123, + "rewards/margins": 0.27567166090011597, + "rewards/rejected": -1.854865312576294, + "step": 177 + }, + { + "epoch": 0.37634621737694085, + "grad_norm": 2.165984869003296, + "learning_rate": 7.854379014263876e-07, + "logits/chosen": -0.46125832200050354, + "logits/rejected": -0.39802712202072144, + "logps/chosen": -0.8382925391197205, + "logps/rejected": -0.9422982931137085, + "loss": 1.339, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.676585078239441, + "rewards/margins": 0.20801125466823578, + "rewards/rejected": -1.884596586227417, + "step": 178 + }, + { + "epoch": 0.37846052196894614, + "grad_norm": 0.522197425365448, + "learning_rate": 7.823883936130817e-07, + "logits/chosen": -0.4747823476791382, + "logits/rejected": -0.4888593554496765, + "logps/chosen": -0.723059892654419, + "logps/rejected": -0.84626305103302, + "loss": 1.2708, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.446119785308838, + "rewards/margins": 0.24640652537345886, + "rewards/rejected": -1.69252610206604, + "step": 179 + }, + { + "epoch": 0.38057482656095143, + "grad_norm": 1.9690748453140259, + "learning_rate": 7.793233829018262e-07, + "logits/chosen": -0.5430271625518799, + "logits/rejected": -0.5403288006782532, + "logps/chosen": -0.8244275450706482, + "logps/rejected": -0.9133931994438171, + "loss": 1.3306, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6488550901412964, + "rewards/margins": 0.17793115973472595, + "rewards/rejected": -1.8267863988876343, + "step": 180 + }, + { + "epoch": 0.3826891311529567, + "grad_norm": 2.9181363582611084, + "learning_rate": 7.762430375592688e-07, + "logits/chosen": -0.4843495786190033, + "logits/rejected": -0.47929176688194275, + "logps/chosen": -0.8097372055053711, + "logps/rejected": -0.8973760008811951, + "loss": 1.3283, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6194744110107422, + "rewards/margins": 0.17527759075164795, + "rewards/rejected": -1.7947520017623901, + "step": 181 + }, + { + "epoch": 0.384803435744962, + "grad_norm": 4.227083683013916, + "learning_rate": 7.731475266939158e-07, + "logits/chosen": -0.5047686696052551, + "logits/rejected": -0.4921850264072418, + "logps/chosen": -0.875984787940979, + "logps/rejected": -1.0406755208969116, + "loss": 1.3169, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.751969575881958, + "rewards/margins": 0.32938146591186523, + "rewards/rejected": -2.0813510417938232, + "step": 182 + }, + { + "epoch": 0.3869177403369673, + "grad_norm": 1.2871490716934204, + "learning_rate": 7.700370202468489e-07, + "logits/chosen": -0.5123783349990845, + "logits/rejected": -0.55179762840271, + "logps/chosen": -0.8869211077690125, + "logps/rejected": -1.1082773208618164, + "loss": 1.216, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.773842215538025, + "rewards/margins": 0.4427123963832855, + "rewards/rejected": -2.216554641723633, + "step": 183 + }, + { + "epoch": 0.3890320449289726, + "grad_norm": 1.3015679121017456, + "learning_rate": 7.669116889823954e-07, + "logits/chosen": -0.49182361364364624, + "logits/rejected": -0.5180585384368896, + "logps/chosen": -0.8816227912902832, + "logps/rejected": -0.9516821503639221, + "loss": 1.3449, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7632455825805664, + "rewards/margins": 0.14011862874031067, + "rewards/rejected": -1.9033643007278442, + "step": 184 + }, + { + "epoch": 0.3911463495209779, + "grad_norm": 4.280956268310547, + "learning_rate": 7.637717044787526e-07, + "logits/chosen": -0.5702117681503296, + "logits/rejected": -0.5475804209709167, + "logps/chosen": -0.9307697415351868, + "logps/rejected": -1.0322346687316895, + "loss": 1.3434, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.8615394830703735, + "rewards/margins": 0.20292985439300537, + "rewards/rejected": -2.064469337463379, + "step": 185 + }, + { + "epoch": 0.39326065411298317, + "grad_norm": 1.3511455059051514, + "learning_rate": 7.606172391185699e-07, + "logits/chosen": -0.5466108322143555, + "logits/rejected": -0.551085352897644, + "logps/chosen": -1.0657893419265747, + "logps/rejected": -1.15786612033844, + "loss": 1.3549, + "rewards/accuracies": 0.4609375, + "rewards/chosen": -2.1315786838531494, + "rewards/margins": 0.18415334820747375, + "rewards/rejected": -2.31573224067688, + "step": 186 + }, + { + "epoch": 0.39537495870498846, + "grad_norm": 0.7001176476478577, + "learning_rate": 7.574484660794836e-07, + "logits/chosen": -0.4849010407924652, + "logits/rejected": -0.5057946443557739, + "logps/chosen": -1.0784757137298584, + "logps/rejected": -1.2035218477249146, + "loss": 1.3556, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.156951427459717, + "rewards/margins": 0.25009211897850037, + "rewards/rejected": -2.407043695449829, + "step": 187 + }, + { + "epoch": 0.39748926329699374, + "grad_norm": 3.1405649185180664, + "learning_rate": 7.542655593246103e-07, + "logits/chosen": -0.5316596031188965, + "logits/rejected": -0.5658366680145264, + "logps/chosen": -1.0630009174346924, + "logps/rejected": -1.2867177724838257, + "loss": 1.2612, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.1260018348693848, + "rewards/margins": 0.447433739900589, + "rewards/rejected": -2.5734355449676514, + "step": 188 + }, + { + "epoch": 0.39960356788899903, + "grad_norm": 2.142986297607422, + "learning_rate": 7.510686935929962e-07, + "logits/chosen": -0.5959028005599976, + "logits/rejected": -0.5836039781570435, + "logps/chosen": -1.111003041267395, + "logps/rejected": -1.1858208179473877, + "loss": 1.3958, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.22200608253479, + "rewards/margins": 0.149635449051857, + "rewards/rejected": -2.3716416358947754, + "step": 189 + }, + { + "epoch": 0.40171787248100427, + "grad_norm": 1.9227335453033447, + "learning_rate": 7.478580443900246e-07, + "logits/chosen": -0.607532799243927, + "logits/rejected": -0.6102017760276794, + "logps/chosen": -1.3353261947631836, + "logps/rejected": -1.3975369930267334, + "loss": 1.457, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -2.670652389526367, + "rewards/margins": 0.12442154437303543, + "rewards/rejected": -2.795073986053467, + "step": 190 + }, + { + "epoch": 0.40383217707300956, + "grad_norm": 0.8509105443954468, + "learning_rate": 7.446337879777802e-07, + "logits/chosen": -0.5903070569038391, + "logits/rejected": -0.5728173851966858, + "logps/chosen": -1.27094566822052, + "logps/rejected": -1.3024815320968628, + "loss": 1.4953, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.54189133644104, + "rewards/margins": 0.06307169049978256, + "rewards/rejected": -2.6049630641937256, + "step": 191 + }, + { + "epoch": 0.40594648166501485, + "grad_norm": 1.1561088562011719, + "learning_rate": 7.413961013653725e-07, + "logits/chosen": -0.5578102469444275, + "logits/rejected": -0.5907329320907593, + "logps/chosen": -1.3817013502120972, + "logps/rejected": -1.419295072555542, + "loss": 1.4865, + "rewards/accuracies": 0.515625, + "rewards/chosen": -2.7634027004241943, + "rewards/margins": 0.07518734782934189, + "rewards/rejected": -2.838590145111084, + "step": 192 + }, + { + "epoch": 0.40806078625702014, + "grad_norm": 8.165387153625488, + "learning_rate": 7.381451622992183e-07, + "logits/chosen": -0.5213198661804199, + "logits/rejected": -0.5392848253250122, + "logps/chosen": -1.1798306703567505, + "logps/rejected": -1.2692899703979492, + "loss": 1.3971, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.359661340713501, + "rewards/margins": 0.17891867458820343, + "rewards/rejected": -2.5385799407958984, + "step": 193 + }, + { + "epoch": 0.4101750908490254, + "grad_norm": 1.2850884199142456, + "learning_rate": 7.348811492532839e-07, + "logits/chosen": -0.5382787585258484, + "logits/rejected": -0.5274642705917358, + "logps/chosen": -1.242587685585022, + "logps/rejected": -1.272438645362854, + "loss": 1.4795, + "rewards/accuracies": 0.4921875, + "rewards/chosen": -2.485175371170044, + "rewards/margins": 0.05970197170972824, + "rewards/rejected": -2.544877290725708, + "step": 194 + }, + { + "epoch": 0.4122893954410307, + "grad_norm": 4.910929203033447, + "learning_rate": 7.316042414192864e-07, + "logits/chosen": -0.6186666488647461, + "logits/rejected": -0.6255884170532227, + "logps/chosen": -1.1743704080581665, + "logps/rejected": -1.2720146179199219, + "loss": 1.4127, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.348740816116333, + "rewards/margins": 0.19528816640377045, + "rewards/rejected": -2.5440292358398438, + "step": 195 + }, + { + "epoch": 0.414403700033036, + "grad_norm": 4.270901203155518, + "learning_rate": 7.283146186968565e-07, + "logits/chosen": -0.5861366987228394, + "logits/rejected": -0.6005197763442993, + "logps/chosen": -1.2127022743225098, + "logps/rejected": -1.3036490678787231, + "loss": 1.4067, + "rewards/accuracies": 0.546875, + "rewards/chosen": -2.4254045486450195, + "rewards/margins": 0.18189355731010437, + "rewards/rejected": -2.6072981357574463, + "step": 196 + }, + { + "epoch": 0.4165180046250413, + "grad_norm": 0.3070116639137268, + "learning_rate": 7.250124616836622e-07, + "logits/chosen": -0.6026022434234619, + "logits/rejected": -0.5920048952102661, + "logps/chosen": -1.0706496238708496, + "logps/rejected": -1.2879594564437866, + "loss": 1.2465, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.141299247741699, + "rewards/margins": 0.4346192479133606, + "rewards/rejected": -2.5759189128875732, + "step": 197 + }, + { + "epoch": 0.4186323092170466, + "grad_norm": 1.160252571105957, + "learning_rate": 7.216979516654943e-07, + "logits/chosen": -0.5808722376823425, + "logits/rejected": -0.5770124197006226, + "logps/chosen": -1.0426011085510254, + "logps/rejected": -1.1295092105865479, + "loss": 1.4244, + "rewards/accuracies": 0.53125, + "rewards/chosen": -2.085202217102051, + "rewards/margins": 0.1738162338733673, + "rewards/rejected": -2.2590184211730957, + "step": 198 + }, + { + "epoch": 0.4207466138090519, + "grad_norm": 4.6966471672058105, + "learning_rate": 7.183712706063132e-07, + "logits/chosen": -0.5958350896835327, + "logits/rejected": -0.6440161466598511, + "logps/chosen": -0.981076717376709, + "logps/rejected": -1.1257147789001465, + "loss": 1.3175, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.962153434753418, + "rewards/margins": 0.28927627205848694, + "rewards/rejected": -2.251429557800293, + "step": 199 + }, + { + "epoch": 0.42286091840105716, + "grad_norm": 2.9395248889923096, + "learning_rate": 7.150326011382603e-07, + "logits/chosen": -0.5647889375686646, + "logits/rejected": -0.5762943625450134, + "logps/chosen": -0.8101261854171753, + "logps/rejected": -1.0001438856124878, + "loss": 1.2135, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6202523708343506, + "rewards/margins": 0.38003528118133545, + "rewards/rejected": -2.0002877712249756, + "step": 200 + }, + { + "epoch": 0.42497522299306245, + "grad_norm": 1.2575147151947021, + "learning_rate": 7.116821265516306e-07, + "logits/chosen": -0.5834293961524963, + "logits/rejected": -0.5929508805274963, + "logps/chosen": -0.8768399953842163, + "logps/rejected": -1.0942046642303467, + "loss": 1.219, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7536799907684326, + "rewards/margins": 0.43472927808761597, + "rewards/rejected": -2.1884093284606934, + "step": 201 + }, + { + "epoch": 0.42708952758506774, + "grad_norm": 1.4035751819610596, + "learning_rate": 7.083200307848115e-07, + "logits/chosen": -0.5424078106880188, + "logits/rejected": -0.5316082239151001, + "logps/chosen": -0.8791903257369995, + "logps/rejected": -0.9323580265045166, + "loss": 1.3675, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.758380651473999, + "rewards/margins": 0.10633517056703568, + "rewards/rejected": -1.8647160530090332, + "step": 202 + }, + { + "epoch": 0.42920383217707303, + "grad_norm": 1.8622503280639648, + "learning_rate": 7.049464984141829e-07, + "logits/chosen": -0.5329294204711914, + "logits/rejected": -0.5523126721382141, + "logps/chosen": -0.695776104927063, + "logps/rejected": -0.8400713801383972, + "loss": 1.2285, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.391552209854126, + "rewards/margins": 0.28859058022499084, + "rewards/rejected": -1.6801427602767944, + "step": 203 + }, + { + "epoch": 0.4313181367690783, + "grad_norm": 0.8603182435035706, + "learning_rate": 7.015617146439861e-07, + "logits/chosen": -0.4516752064228058, + "logits/rejected": -0.46907976269721985, + "logps/chosen": -0.6868133544921875, + "logps/rejected": -0.8646677732467651, + "loss": 1.2417, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.373626708984375, + "rewards/margins": 0.355709046125412, + "rewards/rejected": -1.7293355464935303, + "step": 204 + }, + { + "epoch": 0.43343244136108355, + "grad_norm": 0.6437748670578003, + "learning_rate": 6.981658652961546e-07, + "logits/chosen": -0.6159051656723022, + "logits/rejected": -0.6000130772590637, + "logps/chosen": -0.7715178728103638, + "logps/rejected": -0.8714219331741333, + "loss": 1.3469, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -1.5430357456207275, + "rewards/margins": 0.19980813562870026, + "rewards/rejected": -1.7428438663482666, + "step": 205 + }, + { + "epoch": 0.43554674595308884, + "grad_norm": 1.2309322357177734, + "learning_rate": 6.947591368001137e-07, + "logits/chosen": -0.5913614630699158, + "logits/rejected": -0.6128537654876709, + "logps/chosen": -0.7512561678886414, + "logps/rejected": -0.8872793912887573, + "loss": 1.26, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.5025123357772827, + "rewards/margins": 0.2720465660095215, + "rewards/rejected": -1.7745587825775146, + "step": 206 + }, + { + "epoch": 0.43766105054509413, + "grad_norm": 0.6153685450553894, + "learning_rate": 6.913417161825449e-07, + "logits/chosen": -0.5976595878601074, + "logits/rejected": -0.6222202181816101, + "logps/chosen": -0.837669849395752, + "logps/rejected": -0.9835771918296814, + "loss": 1.2986, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.675339698791504, + "rewards/margins": 0.2918146252632141, + "rewards/rejected": -1.9671543836593628, + "step": 207 + }, + { + "epoch": 0.4397753551370994, + "grad_norm": 1.9922760725021362, + "learning_rate": 6.87913791057119e-07, + "logits/chosen": -0.6808818578720093, + "logits/rejected": -0.6692708730697632, + "logps/chosen": -0.7088961601257324, + "logps/rejected": -0.8256410360336304, + "loss": 1.281, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.4177923202514648, + "rewards/margins": 0.23348984122276306, + "rewards/rejected": -1.6512820720672607, + "step": 208 + }, + { + "epoch": 0.4418896597291047, + "grad_norm": 1.9562067985534668, + "learning_rate": 6.844755496141961e-07, + "logits/chosen": -0.5282632112503052, + "logits/rejected": -0.5692226886749268, + "logps/chosen": -0.7235382795333862, + "logps/rejected": -0.801092803478241, + "loss": 1.3227, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.4470765590667725, + "rewards/margins": 0.1551089584827423, + "rewards/rejected": -1.602185606956482, + "step": 209 + }, + { + "epoch": 0.44400396432111, + "grad_norm": 0.8182584047317505, + "learning_rate": 6.81027180610493e-07, + "logits/chosen": -0.6418904662132263, + "logits/rejected": -0.5941328406333923, + "logps/chosen": -0.820648729801178, + "logps/rejected": -0.8864803910255432, + "loss": 1.3498, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.641297459602356, + "rewards/margins": 0.13166317343711853, + "rewards/rejected": -1.7729607820510864, + "step": 210 + }, + { + "epoch": 0.4461182689131153, + "grad_norm": 3.075260877609253, + "learning_rate": 6.775688733587227e-07, + "logits/chosen": -0.5926809906959534, + "logits/rejected": -0.5844541788101196, + "logps/chosen": -0.7822425365447998, + "logps/rejected": -0.8866626024246216, + "loss": 1.2884, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.5644850730895996, + "rewards/margins": 0.20884013175964355, + "rewards/rejected": -1.7733252048492432, + "step": 211 + }, + { + "epoch": 0.4482325735051206, + "grad_norm": 0.8032744526863098, + "learning_rate": 6.741008177171993e-07, + "logits/chosen": -0.579971432685852, + "logits/rejected": -0.5978566408157349, + "logps/chosen": -0.721234917640686, + "logps/rejected": -0.8368514180183411, + "loss": 1.2781, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.442469835281372, + "rewards/margins": 0.23123310506343842, + "rewards/rejected": -1.6737028360366821, + "step": 212 + }, + { + "epoch": 0.45034687809712587, + "grad_norm": 0.6680911779403687, + "learning_rate": 6.706232040794161e-07, + "logits/chosen": -0.6748596429824829, + "logits/rejected": -0.6615546941757202, + "logps/chosen": -0.7931480407714844, + "logps/rejected": -0.8879257440567017, + "loss": 1.337, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.5862960815429688, + "rewards/margins": 0.1895553171634674, + "rewards/rejected": -1.7758514881134033, + "step": 213 + }, + { + "epoch": 0.45246118268913116, + "grad_norm": 2.5107688903808594, + "learning_rate": 6.671362233635925e-07, + "logits/chosen": -0.6460363268852234, + "logits/rejected": -0.6273557543754578, + "logps/chosen": -0.823783814907074, + "logps/rejected": -0.87412428855896, + "loss": 1.3756, + "rewards/accuracies": 0.53125, + "rewards/chosen": -1.647567629814148, + "rewards/margins": 0.10068092495203018, + "rewards/rejected": -1.74824857711792, + "step": 214 + }, + { + "epoch": 0.45457548728113645, + "grad_norm": 2.2206740379333496, + "learning_rate": 6.636400670021933e-07, + "logits/chosen": -0.6295229196548462, + "logits/rejected": -0.6330893039703369, + "logps/chosen": -0.807812511920929, + "logps/rejected": -0.9784457683563232, + "loss": 1.2259, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.615625023841858, + "rewards/margins": 0.3412665128707886, + "rewards/rejected": -1.9568915367126465, + "step": 215 + }, + { + "epoch": 0.45668979187314174, + "grad_norm": 1.2925803661346436, + "learning_rate": 6.601349269314187e-07, + "logits/chosen": -0.6001027822494507, + "logits/rejected": -0.6305864453315735, + "logps/chosen": -0.7216315865516663, + "logps/rejected": -0.8616191744804382, + "loss": 1.269, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1.4432631731033325, + "rewards/margins": 0.2799749970436096, + "rewards/rejected": -1.7232383489608765, + "step": 216 + }, + { + "epoch": 0.458804096465147, + "grad_norm": 4.863992214202881, + "learning_rate": 6.566209955806679e-07, + "logits/chosen": -0.5307935476303101, + "logits/rejected": -0.5385264754295349, + "logps/chosen": -0.8053566813468933, + "logps/rejected": -0.9241464734077454, + "loss": 1.3325, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.6107133626937866, + "rewards/margins": 0.23757943511009216, + "rewards/rejected": -1.8482929468154907, + "step": 217 + }, + { + "epoch": 0.4609184010571523, + "grad_norm": 1.0189604759216309, + "learning_rate": 6.530984658619733e-07, + "logits/chosen": -0.7031885385513306, + "logits/rejected": -0.7072005867958069, + "logps/chosen": -0.8382629752159119, + "logps/rejected": -0.9468755722045898, + "loss": 1.3276, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -1.6765259504318237, + "rewards/margins": 0.21722503006458282, + "rewards/rejected": -1.8937511444091797, + "step": 218 + }, + { + "epoch": 0.4630327056491576, + "grad_norm": 1.1178699731826782, + "learning_rate": 6.495675311594122e-07, + "logits/chosen": -0.5736142992973328, + "logits/rejected": -0.5926069021224976, + "logps/chosen": -0.7676032781600952, + "logps/rejected": -0.9179919958114624, + "loss": 1.278, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.5352065563201904, + "rewards/margins": 0.3007773756980896, + "rewards/rejected": -1.8359839916229248, + "step": 219 + }, + { + "epoch": 0.4651470102411629, + "grad_norm": 2.4985287189483643, + "learning_rate": 6.460283853184879e-07, + "logits/chosen": -0.6372602581977844, + "logits/rejected": -0.6313104033470154, + "logps/chosen": -0.8754556179046631, + "logps/rejected": -0.9803894758224487, + "loss": 1.3166, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.7509112358093262, + "rewards/margins": 0.2098677009344101, + "rewards/rejected": -1.9607789516448975, + "step": 220 + }, + { + "epoch": 0.46726131483316813, + "grad_norm": 1.5675435066223145, + "learning_rate": 6.424812226354889e-07, + "logits/chosen": -0.6377983093261719, + "logits/rejected": -0.6666730642318726, + "logps/chosen": -0.7556843757629395, + "logps/rejected": -0.9096466302871704, + "loss": 1.2397, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -1.511368751525879, + "rewards/margins": 0.30792441964149475, + "rewards/rejected": -1.8192932605743408, + "step": 221 + }, + { + "epoch": 0.4693756194251734, + "grad_norm": 2.853426218032837, + "learning_rate": 6.389262378468219e-07, + "logits/chosen": -0.6055567860603333, + "logits/rejected": -0.612144947052002, + "logps/chosen": -0.8588352203369141, + "logps/rejected": -0.8928595185279846, + "loss": 1.4022, + "rewards/accuracies": 0.5078125, + "rewards/chosen": -1.7176704406738281, + "rewards/margins": 0.06804870069026947, + "rewards/rejected": -1.7857190370559692, + "step": 222 + }, + { + "epoch": 0.4714899240171787, + "grad_norm": 0.528042733669281, + "learning_rate": 6.353636261183213e-07, + "logits/chosen": -0.6543641090393066, + "logits/rejected": -0.6635830402374268, + "logps/chosen": -0.7858147621154785, + "logps/rejected": -0.9400445222854614, + "loss": 1.2446, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.571629524230957, + "rewards/margins": 0.3084595203399658, + "rewards/rejected": -1.8800890445709229, + "step": 223 + }, + { + "epoch": 0.473604228609184, + "grad_norm": 1.1155768632888794, + "learning_rate": 6.317935830345338e-07, + "logits/chosen": -0.5700349807739258, + "logits/rejected": -0.6560614705085754, + "logps/chosen": -0.8426170945167542, + "logps/rejected": -0.9983471035957336, + "loss": 1.3204, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.6852341890335083, + "rewards/margins": 0.3114599883556366, + "rewards/rejected": -1.9966942071914673, + "step": 224 + }, + { + "epoch": 0.4757185332011893, + "grad_norm": 0.802669107913971, + "learning_rate": 6.282163045879823e-07, + "logits/chosen": -0.6912901401519775, + "logits/rejected": -0.7201069593429565, + "logps/chosen": -0.8135342597961426, + "logps/rejected": -0.9537283182144165, + "loss": 1.2961, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -1.6270685195922852, + "rewards/margins": 0.2803882658481598, + "rewards/rejected": -1.907456636428833, + "step": 225 + }, + { + "epoch": 0.4778328377931946, + "grad_norm": 1.709757924079895, + "learning_rate": 6.246319871684047e-07, + "logits/chosen": -0.7573816776275635, + "logits/rejected": -0.8028420209884644, + "logps/chosen": -0.891952633857727, + "logps/rejected": -1.0168029069900513, + "loss": 1.333, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -1.783905267715454, + "rewards/margins": 0.24970072507858276, + "rewards/rejected": -2.0336058139801025, + "step": 226 + }, + { + "epoch": 0.47994714238519987, + "grad_norm": 2.170957326889038, + "learning_rate": 6.210408275519734e-07, + "logits/chosen": -0.6915597915649414, + "logits/rejected": -0.7027997970581055, + "logps/chosen": -0.9063036441802979, + "logps/rejected": -1.0104373693466187, + "loss": 1.3388, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8126072883605957, + "rewards/margins": 0.20826762914657593, + "rewards/rejected": -2.0208747386932373, + "step": 227 + }, + { + "epoch": 0.48206144697720515, + "grad_norm": 1.8802261352539062, + "learning_rate": 6.174430228904919e-07, + "logits/chosen": -0.689726710319519, + "logits/rejected": -0.7143282890319824, + "logps/chosen": -0.7480812072753906, + "logps/rejected": -0.8698041439056396, + "loss": 1.2836, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.4961624145507812, + "rewards/margins": 0.24344584345817566, + "rewards/rejected": -1.7396082878112793, + "step": 228 + }, + { + "epoch": 0.48417575156921044, + "grad_norm": 2.5202934741973877, + "learning_rate": 6.13838770700571e-07, + "logits/chosen": -0.6858299374580383, + "logits/rejected": -0.7115206122398376, + "logps/chosen": -0.8575515151023865, + "logps/rejected": -0.9657347202301025, + "loss": 1.3046, + "rewards/accuracies": 0.546875, + "rewards/chosen": -1.715103030204773, + "rewards/margins": 0.21636635065078735, + "rewards/rejected": -1.931469440460205, + "step": 229 + }, + { + "epoch": 0.48629005616121573, + "grad_norm": 1.268512487411499, + "learning_rate": 6.102282688527859e-07, + "logits/chosen": -0.7078689932823181, + "logits/rejected": -0.7254161238670349, + "logps/chosen": -0.8850880861282349, + "logps/rejected": -1.031385898590088, + "loss": 1.2816, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.7701761722564697, + "rewards/margins": 0.29259535670280457, + "rewards/rejected": -2.062771797180176, + "step": 230 + }, + { + "epoch": 0.488404360753221, + "grad_norm": 1.7285584211349487, + "learning_rate": 6.066117155608135e-07, + "logits/chosen": -0.7325868606567383, + "logits/rejected": -0.7433226108551025, + "logps/chosen": -0.8014956116676331, + "logps/rejected": -0.9653260111808777, + "loss": 1.2429, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.6029912233352661, + "rewards/margins": 0.32766085863113403, + "rewards/rejected": -1.9306520223617554, + "step": 231 + }, + { + "epoch": 0.4905186653452263, + "grad_norm": 0.6270304322242737, + "learning_rate": 6.029893093705491e-07, + "logits/chosen": -0.692166805267334, + "logits/rejected": -0.6799293756484985, + "logps/chosen": -0.7850213646888733, + "logps/rejected": -0.8839574456214905, + "loss": 1.2967, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1.5700427293777466, + "rewards/margins": 0.19787229597568512, + "rewards/rejected": -1.767914891242981, + "step": 232 + }, + { + "epoch": 0.4926329699372316, + "grad_norm": 1.0160484313964844, + "learning_rate": 5.993612491492087e-07, + "logits/chosen": -0.7095844149589539, + "logits/rejected": -0.71524578332901, + "logps/chosen": -0.7063854336738586, + "logps/rejected": -0.8855549097061157, + "loss": 1.2176, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.4127708673477173, + "rewards/margins": 0.3583390712738037, + "rewards/rejected": -1.7711098194122314, + "step": 233 + }, + { + "epoch": 0.4947472745292369, + "grad_norm": 2.225841999053955, + "learning_rate": 5.957277340744094e-07, + "logits/chosen": -0.7488946318626404, + "logits/rejected": -0.7588428854942322, + "logps/chosen": -0.9203822612762451, + "logps/rejected": -1.0089298486709595, + "loss": 1.355, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.8407645225524902, + "rewards/margins": 0.17709502577781677, + "rewards/rejected": -2.017859697341919, + "step": 234 + }, + { + "epoch": 0.4968615791212422, + "grad_norm": 1.9577795267105103, + "learning_rate": 5.920889636232351e-07, + "logits/chosen": -0.8078997731208801, + "logits/rejected": -0.8064825534820557, + "logps/chosen": -0.8004480004310608, + "logps/rejected": -0.9856831431388855, + "loss": 1.2273, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.6008960008621216, + "rewards/margins": 0.3704703152179718, + "rewards/rejected": -1.971366286277771, + "step": 235 + }, + { + "epoch": 0.4989758837132474, + "grad_norm": 2.5050246715545654, + "learning_rate": 5.884451375612865e-07, + "logits/chosen": -0.7499472498893738, + "logits/rejected": -0.7421904802322388, + "logps/chosen": -0.8363584876060486, + "logps/rejected": -0.9543781876564026, + "loss": 1.3002, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -1.6727169752120972, + "rewards/margins": 0.23603934049606323, + "rewards/rejected": -1.9087563753128052, + "step": 236 + }, + { + "epoch": 0.5010901883052528, + "grad_norm": 0.585436224937439, + "learning_rate": 5.847964559317128e-07, + "logits/chosen": -0.730015218257904, + "logits/rejected": -0.7154791355133057, + "logps/chosen": -0.8828849196434021, + "logps/rejected": -0.9897070527076721, + "loss": 1.347, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.7657698392868042, + "rewards/margins": 0.21364440023899078, + "rewards/rejected": -1.9794141054153442, + "step": 237 + }, + { + "epoch": 0.503204492897258, + "grad_norm": 0.9204092621803284, + "learning_rate": 5.8114311904423e-07, + "logits/chosen": -0.759974479675293, + "logits/rejected": -0.7793674468994141, + "logps/chosen": -0.8321584463119507, + "logps/rejected": -1.0809751749038696, + "loss": 1.2185, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6643168926239014, + "rewards/margins": 0.4976334273815155, + "rewards/rejected": -2.1619503498077393, + "step": 238 + }, + { + "epoch": 0.5053187974892633, + "grad_norm": 5.147011756896973, + "learning_rate": 5.774853274641243e-07, + "logits/chosen": -0.7148956060409546, + "logits/rejected": -0.7363921403884888, + "logps/chosen": -0.8623124361038208, + "logps/rejected": -1.0681498050689697, + "loss": 1.2353, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -1.7246248722076416, + "rewards/margins": 0.4116746187210083, + "rewards/rejected": -2.1362996101379395, + "step": 239 + }, + { + "epoch": 0.5074331020812686, + "grad_norm": 1.9065529108047485, + "learning_rate": 5.738232820012407e-07, + "logits/chosen": -0.7158540487289429, + "logits/rejected": -0.7083900570869446, + "logps/chosen": -0.981558620929718, + "logps/rejected": -1.054612636566162, + "loss": 1.3594, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.963117241859436, + "rewards/margins": 0.14610806107521057, + "rewards/rejected": -2.109225273132324, + "step": 240 + }, + { + "epoch": 0.5095474066732739, + "grad_norm": 2.4411256313323975, + "learning_rate": 5.701571836989591e-07, + "logits/chosen": -0.8441444039344788, + "logits/rejected": -0.8529233336448669, + "logps/chosen": -0.8665949702262878, + "logps/rejected": -1.030572772026062, + "loss": 1.2477, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.7331899404525757, + "rewards/margins": 0.3279556334018707, + "rewards/rejected": -2.061145544052124, + "step": 241 + }, + { + "epoch": 0.5116617112652792, + "grad_norm": 2.461113214492798, + "learning_rate": 5.664872338231571e-07, + "logits/chosen": -0.7463312149047852, + "logits/rejected": -0.7725105285644531, + "logps/chosen": -0.9185941815376282, + "logps/rejected": -1.1244423389434814, + "loss": 1.2404, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -1.8371883630752563, + "rewards/margins": 0.411696195602417, + "rewards/rejected": -2.248884677886963, + "step": 242 + }, + { + "epoch": 0.5137760158572844, + "grad_norm": 3.5861761569976807, + "learning_rate": 5.628136338511607e-07, + "logits/chosen": -0.8432914018630981, + "logits/rejected": -0.85801100730896, + "logps/chosen": -0.8873915672302246, + "logps/rejected": -1.0090795755386353, + "loss": 1.3072, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.7747831344604492, + "rewards/margins": 0.24337637424468994, + "rewards/rejected": -2.0181591510772705, + "step": 243 + }, + { + "epoch": 0.5158903204492897, + "grad_norm": 2.109071969985962, + "learning_rate": 5.591365854606829e-07, + "logits/chosen": -0.7899532318115234, + "logits/rejected": -0.7548331618309021, + "logps/chosen": -0.9333330392837524, + "logps/rejected": -1.00949227809906, + "loss": 1.3749, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1.8666660785675049, + "rewards/margins": 0.1523183286190033, + "rewards/rejected": -2.01898455619812, + "step": 244 + }, + { + "epoch": 0.518004625041295, + "grad_norm": 2.2017955780029297, + "learning_rate": 5.554562905187527e-07, + "logits/chosen": -0.7569047212600708, + "logits/rejected": -0.7679808735847473, + "logps/chosen": -0.9779613614082336, + "logps/rejected": -1.1713427305221558, + "loss": 1.2628, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1.9559227228164673, + "rewards/margins": 0.3867628276348114, + "rewards/rejected": -2.3426854610443115, + "step": 245 + }, + { + "epoch": 0.5201189296333003, + "grad_norm": 4.651991367340088, + "learning_rate": 5.517729510706315e-07, + "logits/chosen": -0.8546395301818848, + "logits/rejected": -0.8609369397163391, + "logps/chosen": -0.9926605224609375, + "logps/rejected": -1.1553713083267212, + "loss": 1.2812, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.985321044921875, + "rewards/margins": 0.32542160153388977, + "rewards/rejected": -2.3107426166534424, + "step": 246 + }, + { + "epoch": 0.5222332342253055, + "grad_norm": 2.6384060382843018, + "learning_rate": 5.480867693287223e-07, + "logits/chosen": -0.7734386324882507, + "logits/rejected": -0.7963250875473022, + "logps/chosen": -0.8996341824531555, + "logps/rejected": -1.0466523170471191, + "loss": 1.2849, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1.799268364906311, + "rewards/margins": 0.2940361201763153, + "rewards/rejected": -2.0933046340942383, + "step": 247 + }, + { + "epoch": 0.5243475388173109, + "grad_norm": 1.3608977794647217, + "learning_rate": 5.443979476614674e-07, + "logits/chosen": -0.7350472807884216, + "logits/rejected": -0.7215992212295532, + "logps/chosen": -0.8887076377868652, + "logps/rejected": -1.0147045850753784, + "loss": 1.3182, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -1.7774152755737305, + "rewards/margins": 0.25199398398399353, + "rewards/rejected": -2.029409170150757, + "step": 248 + }, + { + "epoch": 0.5264618434093161, + "grad_norm": 3.017115354537964, + "learning_rate": 5.407066885822391e-07, + "logits/chosen": -0.827782154083252, + "logits/rejected": -0.8471929430961609, + "logps/chosen": -0.9262440800666809, + "logps/rejected": -1.1658306121826172, + "loss": 1.1882, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -1.8524881601333618, + "rewards/margins": 0.47917306423187256, + "rewards/rejected": -2.3316612243652344, + "step": 249 + }, + { + "epoch": 0.5285761480013215, + "grad_norm": 0.7805312275886536, + "learning_rate": 5.370131947382214e-07, + "logits/chosen": -0.7815499305725098, + "logits/rejected": -0.8279274702072144, + "logps/chosen": -0.968708872795105, + "logps/rejected": -1.2697322368621826, + "loss": 1.2092, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1.93741774559021, + "rewards/margins": 0.6020466685295105, + "rewards/rejected": -2.5394644737243652, + "step": 250 + }, + { + "epoch": 0.5306904525933267, + "grad_norm": 2.229363441467285, + "learning_rate": 5.333176688992855e-07, + "logits/chosen": -0.7824153900146484, + "logits/rejected": -0.8154900074005127, + "logps/chosen": -1.0211957693099976, + "logps/rejected": -1.2145965099334717, + "loss": 1.3074, + "rewards/accuracies": 0.609375, + "rewards/chosen": -2.042391538619995, + "rewards/margins": 0.3868010938167572, + "rewards/rejected": -2.4291930198669434, + "step": 251 + }, + { + "epoch": 0.532804757185332, + "grad_norm": 1.1359837055206299, + "learning_rate": 5.296203139468571e-07, + "logits/chosen": -0.7467613220214844, + "logits/rejected": -0.7548531889915466, + "logps/chosen": -1.0614902973175049, + "logps/rejected": -1.2674376964569092, + "loss": 1.2512, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.1229805946350098, + "rewards/margins": 0.4118950664997101, + "rewards/rejected": -2.5348753929138184, + "step": 252 + }, + { + "epoch": 0.5349190617773373, + "grad_norm": 3.0548548698425293, + "learning_rate": 5.259213328627792e-07, + "logits/chosen": -0.7868636250495911, + "logits/rejected": -0.8130850791931152, + "logps/chosen": -1.0743666887283325, + "logps/rejected": -1.2010191679000854, + "loss": 1.3275, + "rewards/accuracies": 0.578125, + "rewards/chosen": -2.148733377456665, + "rewards/margins": 0.2533051669597626, + "rewards/rejected": -2.402038335800171, + "step": 253 + }, + { + "epoch": 0.5370333663693426, + "grad_norm": 1.7205246686935425, + "learning_rate": 5.222209287181676e-07, + "logits/chosen": -0.81404709815979, + "logits/rejected": -0.8481613397598267, + "logps/chosen": -1.1599587202072144, + "logps/rejected": -1.4234716892242432, + "loss": 1.2894, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -2.3199174404144287, + "rewards/margins": 0.5270256400108337, + "rewards/rejected": -2.8469433784484863, + "step": 254 + }, + { + "epoch": 0.5391476709613479, + "grad_norm": 2.2516112327575684, + "learning_rate": 5.185193046622634e-07, + "logits/chosen": -0.8112510442733765, + "logits/rejected": -0.8310728073120117, + "logps/chosen": -1.1263186931610107, + "logps/rejected": -1.3256827592849731, + "loss": 1.3552, + "rewards/accuracies": 0.5390625, + "rewards/chosen": -2.2526373863220215, + "rewards/margins": 0.39872825145721436, + "rewards/rejected": -2.6513655185699463, + "step": 255 + }, + { + "epoch": 0.5412619755533532, + "grad_norm": 2.8379359245300293, + "learning_rate": 5.148166639112799e-07, + "logits/chosen": -0.8202102184295654, + "logits/rejected": -0.845209002494812, + "logps/chosen": -1.264180302619934, + "logps/rejected": -1.6190590858459473, + "loss": 1.2083, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.528360605239868, + "rewards/margins": 0.7097575068473816, + "rewards/rejected": -3.2381181716918945, + "step": 256 + }, + { + "epoch": 0.5433762801453584, + "grad_norm": 4.676355838775635, + "learning_rate": 5.111132097372459e-07, + "logits/chosen": -0.8866451978683472, + "logits/rejected": -0.8642281889915466, + "logps/chosen": -1.3194389343261719, + "logps/rejected": -1.4506916999816895, + "loss": 1.4002, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.6388778686523438, + "rewards/margins": 0.2625058591365814, + "rewards/rejected": -2.901383399963379, + "step": 257 + }, + { + "epoch": 0.5454905847373638, + "grad_norm": 2.55251407623291, + "learning_rate": 5.074091454568463e-07, + "logits/chosen": -0.7903708815574646, + "logits/rejected": -0.8010709881782532, + "logps/chosen": -1.3550961017608643, + "logps/rejected": -1.661428451538086, + "loss": 1.2131, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.7101922035217285, + "rewards/margins": 0.6126645803451538, + "rewards/rejected": -3.322856903076172, + "step": 258 + }, + { + "epoch": 0.547604889329369, + "grad_norm": 4.116572856903076, + "learning_rate": 5.037046744202611e-07, + "logits/chosen": -0.7501232624053955, + "logits/rejected": -0.7825176119804382, + "logps/chosen": -1.2111856937408447, + "logps/rejected": -1.5176191329956055, + "loss": 1.1345, + "rewards/accuracies": 0.71875, + "rewards/chosen": -2.4223713874816895, + "rewards/margins": 0.6128667593002319, + "rewards/rejected": -3.035238265991211, + "step": 259 + }, + { + "epoch": 0.5497191939213742, + "grad_norm": 2.0285205841064453, + "learning_rate": 5e-07, + "logits/chosen": -0.8355445861816406, + "logits/rejected": -0.8497716784477234, + "logps/chosen": -1.1876304149627686, + "logps/rejected": -1.4788450002670288, + "loss": 1.1559, + "rewards/accuracies": 0.671875, + "rewards/chosen": -2.375260829925537, + "rewards/margins": 0.5824294090270996, + "rewards/rejected": -2.9576900005340576, + "step": 260 + }, + { + "epoch": 0.5518334985133796, + "grad_norm": 4.681185245513916, + "learning_rate": 4.962953255797389e-07, + "logits/chosen": -0.8240503072738647, + "logits/rejected": -0.8016488552093506, + "logps/chosen": -1.2238959074020386, + "logps/rejected": -1.4727882146835327, + "loss": 1.2914, + "rewards/accuracies": 0.5546875, + "rewards/chosen": -2.447791814804077, + "rewards/margins": 0.49778467416763306, + "rewards/rejected": -2.9455764293670654, + "step": 261 + }, + { + "epoch": 0.5539478031053848, + "grad_norm": 5.15679931640625, + "learning_rate": 4.925908545431537e-07, + "logits/chosen": -0.728940486907959, + "logits/rejected": -0.7355924248695374, + "logps/chosen": -1.3356778621673584, + "logps/rejected": -1.6726096868515015, + "loss": 1.1434, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.671355724334717, + "rewards/margins": 0.6738637685775757, + "rewards/rejected": -3.345219373703003, + "step": 262 + }, + { + "epoch": 0.5560621076973902, + "grad_norm": 2.481048345565796, + "learning_rate": 4.888867902627543e-07, + "logits/chosen": -0.8311591148376465, + "logits/rejected": -0.8191719055175781, + "logps/chosen": -1.2743335962295532, + "logps/rejected": -1.5339927673339844, + "loss": 1.2164, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.5486671924591064, + "rewards/margins": 0.5193185210227966, + "rewards/rejected": -3.0679855346679688, + "step": 263 + }, + { + "epoch": 0.5581764122893954, + "grad_norm": 3.6758291721343994, + "learning_rate": 4.851833360887201e-07, + "logits/chosen": -0.6787989735603333, + "logits/rejected": -0.668928325176239, + "logps/chosen": -1.2278664112091064, + "logps/rejected": -1.4955706596374512, + "loss": 1.1942, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -2.455732822418213, + "rewards/margins": 0.535408616065979, + "rewards/rejected": -2.9911413192749023, + "step": 264 + }, + { + "epoch": 0.5602907168814008, + "grad_norm": 2.7282023429870605, + "learning_rate": 4.814806953377365e-07, + "logits/chosen": -0.7772133350372314, + "logits/rejected": -0.7689889073371887, + "logps/chosen": -1.1954048871994019, + "logps/rejected": -1.444943904876709, + "loss": 1.2686, + "rewards/accuracies": 0.59375, + "rewards/chosen": -2.3908097743988037, + "rewards/margins": 0.4990782141685486, + "rewards/rejected": -2.889887809753418, + "step": 265 + }, + { + "epoch": 0.562405021473406, + "grad_norm": 2.8753116130828857, + "learning_rate": 4.777790712818323e-07, + "logits/chosen": -0.6946043968200684, + "logits/rejected": -0.7001516819000244, + "logps/chosen": -1.2844620943069458, + "logps/rejected": -1.486103892326355, + "loss": 1.284, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -2.5689241886138916, + "rewards/margins": 0.4032836854457855, + "rewards/rejected": -2.97220778465271, + "step": 266 + }, + { + "epoch": 0.5645193260654113, + "grad_norm": 1.5583593845367432, + "learning_rate": 4.740786671372209e-07, + "logits/chosen": -0.7396820187568665, + "logits/rejected": -0.7129873037338257, + "logps/chosen": -1.410097599029541, + "logps/rejected": -1.6091456413269043, + "loss": 1.3158, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.820195198059082, + "rewards/margins": 0.3980959951877594, + "rewards/rejected": -3.2182912826538086, + "step": 267 + }, + { + "epoch": 0.5666336306574166, + "grad_norm": 3.5984952449798584, + "learning_rate": 4.703796860531429e-07, + "logits/chosen": -0.7031830549240112, + "logits/rejected": -0.700330376625061, + "logps/chosen": -1.633664608001709, + "logps/rejected": -1.9186874628067017, + "loss": 1.2479, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.267329216003418, + "rewards/margins": 0.5700456500053406, + "rewards/rejected": -3.8373749256134033, + "step": 268 + }, + { + "epoch": 0.5687479352494219, + "grad_norm": 6.295733451843262, + "learning_rate": 4.666823311007144e-07, + "logits/chosen": -0.8001950979232788, + "logits/rejected": -0.8042099475860596, + "logps/chosen": -1.5675832033157349, + "logps/rejected": -1.9247075319290161, + "loss": 1.1759, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.1351664066314697, + "rewards/margins": 0.7142485976219177, + "rewards/rejected": -3.8494150638580322, + "step": 269 + }, + { + "epoch": 0.5708622398414271, + "grad_norm": 3.6349036693573, + "learning_rate": 4.6298680526177855e-07, + "logits/chosen": -0.8108068704605103, + "logits/rejected": -0.8030902147293091, + "logps/chosen": -1.8205997943878174, + "logps/rejected": -2.195197105407715, + "loss": 1.1864, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.6411995887756348, + "rewards/margins": 0.7491948008537292, + "rewards/rejected": -4.39039421081543, + "step": 270 + }, + { + "epoch": 0.5729765444334325, + "grad_norm": 4.786395072937012, + "learning_rate": 4.59293311417761e-07, + "logits/chosen": -0.798182487487793, + "logits/rejected": -0.7736828327178955, + "logps/chosen": -1.8617057800292969, + "logps/rejected": -2.08984637260437, + "loss": 1.3947, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.7234115600585938, + "rewards/margins": 0.4562810957431793, + "rewards/rejected": -4.17969274520874, + "step": 271 + }, + { + "epoch": 0.5750908490254377, + "grad_norm": 6.7946457862854, + "learning_rate": 4.556020523385326e-07, + "logits/chosen": -0.7530428171157837, + "logits/rejected": -0.7395590543746948, + "logps/chosen": -1.8709862232208252, + "logps/rejected": -2.3599390983581543, + "loss": 1.1025, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.7419724464416504, + "rewards/margins": 0.9779053926467896, + "rewards/rejected": -4.719878196716309, + "step": 272 + }, + { + "epoch": 0.5772051536174431, + "grad_norm": 4.877624988555908, + "learning_rate": 4.5191323067127773e-07, + "logits/chosen": -0.7732480764389038, + "logits/rejected": -0.7835702300071716, + "logps/chosen": -2.0340800285339355, + "logps/rejected": -2.330742835998535, + "loss": 1.3198, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.068160057067871, + "rewards/margins": 0.5933258533477783, + "rewards/rejected": -4.66148567199707, + "step": 273 + }, + { + "epoch": 0.5793194582094483, + "grad_norm": 9.001680374145508, + "learning_rate": 4.482270489293685e-07, + "logits/chosen": -0.9062263369560242, + "logits/rejected": -0.9105854630470276, + "logps/chosen": -2.1364972591400146, + "logps/rejected": -2.4467523097991943, + "loss": 1.3464, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.272994518280029, + "rewards/margins": 0.6205099821090698, + "rewards/rejected": -4.893504619598389, + "step": 274 + }, + { + "epoch": 0.5814337628014535, + "grad_norm": 2.811025619506836, + "learning_rate": 4.445437094812475e-07, + "logits/chosen": -0.8593579530715942, + "logits/rejected": -0.8343831896781921, + "logps/chosen": -2.452843189239502, + "logps/rejected": -2.7551848888397217, + "loss": 1.3536, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.905686378479004, + "rewards/margins": 0.6046838760375977, + "rewards/rejected": -5.510369777679443, + "step": 275 + }, + { + "epoch": 0.5835480673934589, + "grad_norm": 2.2030158042907715, + "learning_rate": 4.4086341453931714e-07, + "logits/chosen": -0.8991417288780212, + "logits/rejected": -0.8766486644744873, + "logps/chosen": -2.30641508102417, + "logps/rejected": -2.7606654167175293, + "loss": 1.1708, + "rewards/accuracies": 0.6875, + "rewards/chosen": -4.61283016204834, + "rewards/margins": 0.9085015654563904, + "rewards/rejected": -5.521330833435059, + "step": 276 + }, + { + "epoch": 0.5856623719854641, + "grad_norm": 5.5185227394104, + "learning_rate": 4.371863661488393e-07, + "logits/chosen": -0.8738227486610413, + "logits/rejected": -0.8665530681610107, + "logps/chosen": -2.29125714302063, + "logps/rejected": -2.7014153003692627, + "loss": 1.1883, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.58251428604126, + "rewards/margins": 0.8203167915344238, + "rewards/rejected": -5.402830600738525, + "step": 277 + }, + { + "epoch": 0.5877766765774695, + "grad_norm": 2.0779521465301514, + "learning_rate": 4.3351276617684285e-07, + "logits/chosen": -0.958415150642395, + "logits/rejected": -0.9585077166557312, + "logps/chosen": -2.4368410110473633, + "logps/rejected": -2.798506736755371, + "loss": 1.1749, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.873682022094727, + "rewards/margins": 0.7233313322067261, + "rewards/rejected": -5.597013473510742, + "step": 278 + }, + { + "epoch": 0.5898909811694747, + "grad_norm": 2.884877920150757, + "learning_rate": 4.29842816301041e-07, + "logits/chosen": -0.9413051605224609, + "logits/rejected": -0.9224691987037659, + "logps/chosen": -2.485034942626953, + "logps/rejected": -2.911332368850708, + "loss": 1.2035, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.970069885253906, + "rewards/margins": 0.8525944948196411, + "rewards/rejected": -5.822664737701416, + "step": 279 + }, + { + "epoch": 0.59200528576148, + "grad_norm": 5.203248500823975, + "learning_rate": 4.2617671799875944e-07, + "logits/chosen": -0.9359334111213684, + "logits/rejected": -0.9387660026550293, + "logps/chosen": -2.378349542617798, + "logps/rejected": -2.730886936187744, + "loss": 1.2253, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.756699085235596, + "rewards/margins": 0.7050745487213135, + "rewards/rejected": -5.461773872375488, + "step": 280 + }, + { + "epoch": 0.5941195903534853, + "grad_norm": 6.818525314331055, + "learning_rate": 4.225146725358758e-07, + "logits/chosen": -0.8864554166793823, + "logits/rejected": -0.8813320398330688, + "logps/chosen": -2.4233975410461426, + "logps/rejected": -2.8188178539276123, + "loss": 1.2281, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -4.846795082092285, + "rewards/margins": 0.7908411622047424, + "rewards/rejected": -5.637635707855225, + "step": 281 + }, + { + "epoch": 0.5962338949454906, + "grad_norm": 2.529154062271118, + "learning_rate": 4.1885688095577e-07, + "logits/chosen": -0.8420325517654419, + "logits/rejected": -0.8822402954101562, + "logps/chosen": -2.626488447189331, + "logps/rejected": -3.1887192726135254, + "loss": 1.0561, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.252976894378662, + "rewards/margins": 1.1244611740112305, + "rewards/rejected": -6.377438545227051, + "step": 282 + }, + { + "epoch": 0.5983481995374959, + "grad_norm": 3.0739686489105225, + "learning_rate": 4.152035440682873e-07, + "logits/chosen": -0.8550993204116821, + "logits/rejected": -0.8528580665588379, + "logps/chosen": -2.6387887001037598, + "logps/rejected": -2.9952192306518555, + "loss": 1.3409, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.2775774002075195, + "rewards/margins": 0.7128612399101257, + "rewards/rejected": -5.990438461303711, + "step": 283 + }, + { + "epoch": 0.6004625041295012, + "grad_norm": 3.6649062633514404, + "learning_rate": 4.1155486243871363e-07, + "logits/chosen": -0.8643282651901245, + "logits/rejected": -0.9175342321395874, + "logps/chosen": -2.929072618484497, + "logps/rejected": -3.105940580368042, + "loss": 1.5121, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.858145236968994, + "rewards/margins": 0.3537355065345764, + "rewards/rejected": -6.211881160736084, + "step": 284 + }, + { + "epoch": 0.6025768087215064, + "grad_norm": 2.5071723461151123, + "learning_rate": 4.0791103637676486e-07, + "logits/chosen": -0.8368631601333618, + "logits/rejected": -0.819808304309845, + "logps/chosen": -3.0672600269317627, + "logps/rejected": -3.4685003757476807, + "loss": 1.3236, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.134520053863525, + "rewards/margins": 0.8024805784225464, + "rewards/rejected": -6.937000751495361, + "step": 285 + }, + { + "epoch": 0.6046911133135118, + "grad_norm": 8.780280113220215, + "learning_rate": 4.042722659255906e-07, + "logits/chosen": -0.8249569535255432, + "logits/rejected": -0.8442113995552063, + "logps/chosen": -3.3199872970581055, + "logps/rejected": -3.7276291847229004, + "loss": 1.322, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -6.639974594116211, + "rewards/margins": 0.8152831792831421, + "rewards/rejected": -7.455258369445801, + "step": 286 + }, + { + "epoch": 0.606805417905517, + "grad_norm": 3.4388678073883057, + "learning_rate": 4.006387508507914e-07, + "logits/chosen": -0.7224047780036926, + "logits/rejected": -0.7616450786590576, + "logps/chosen": -2.9411330223083496, + "logps/rejected": -3.32680082321167, + "loss": 1.2868, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.882266044616699, + "rewards/margins": 0.7713361978530884, + "rewards/rejected": -6.65360164642334, + "step": 287 + }, + { + "epoch": 0.6089197224975224, + "grad_norm": 5.095273971557617, + "learning_rate": 3.970106906294509e-07, + "logits/chosen": -0.7394692897796631, + "logits/rejected": -0.7316830158233643, + "logps/chosen": -2.9902045726776123, + "logps/rejected": -3.469916820526123, + "loss": 1.1694, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.980409145355225, + "rewards/margins": 0.9594244360923767, + "rewards/rejected": -6.939833641052246, + "step": 288 + }, + { + "epoch": 0.6110340270895276, + "grad_norm": 2.1398613452911377, + "learning_rate": 3.933882844391866e-07, + "logits/chosen": -0.8181312084197998, + "logits/rejected": -0.833306610584259, + "logps/chosen": -3.0137529373168945, + "logps/rejected": -3.4241840839385986, + "loss": 1.2453, + "rewards/accuracies": 0.609375, + "rewards/chosen": -6.027505874633789, + "rewards/margins": 0.8208625316619873, + "rewards/rejected": -6.848368167877197, + "step": 289 + }, + { + "epoch": 0.6131483316815328, + "grad_norm": 4.185284614562988, + "learning_rate": 3.89771731147214e-07, + "logits/chosen": -0.7805104851722717, + "logits/rejected": -0.8086984753608704, + "logps/chosen": -2.984957218170166, + "logps/rejected": -3.430112361907959, + "loss": 1.2671, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -5.969914436340332, + "rewards/margins": 0.890310525894165, + "rewards/rejected": -6.860224723815918, + "step": 290 + }, + { + "epoch": 0.6152626362735382, + "grad_norm": 7.104829788208008, + "learning_rate": 3.861612292994292e-07, + "logits/chosen": -0.7788286209106445, + "logits/rejected": -0.8027424216270447, + "logps/chosen": -2.896563768386841, + "logps/rejected": -3.1082046031951904, + "loss": 1.4853, + "rewards/accuracies": 0.578125, + "rewards/chosen": -5.793127536773682, + "rewards/margins": 0.42328107357025146, + "rewards/rejected": -6.216409206390381, + "step": 291 + }, + { + "epoch": 0.6173769408655434, + "grad_norm": 3.795579433441162, + "learning_rate": 3.825569771095082e-07, + "logits/chosen": -0.8044757843017578, + "logits/rejected": -0.7828265428543091, + "logps/chosen": -2.8059256076812744, + "logps/rejected": -3.3121094703674316, + "loss": 1.1299, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.611851215362549, + "rewards/margins": 1.0123679637908936, + "rewards/rejected": -6.624218940734863, + "step": 292 + }, + { + "epoch": 0.6194912454575487, + "grad_norm": 4.486142158508301, + "learning_rate": 3.7895917244802655e-07, + "logits/chosen": -0.7511788606643677, + "logits/rejected": -0.7885503768920898, + "logps/chosen": -2.927251100540161, + "logps/rejected": -3.1605303287506104, + "loss": 1.426, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.854502201080322, + "rewards/margins": 0.466558575630188, + "rewards/rejected": -6.321060657501221, + "step": 293 + }, + { + "epoch": 0.621605550049554, + "grad_norm": 3.3942787647247314, + "learning_rate": 3.753680128315952e-07, + "logits/chosen": -0.8230300545692444, + "logits/rejected": -0.8042524456977844, + "logps/chosen": -2.524353504180908, + "logps/rejected": -2.8687357902526855, + "loss": 1.2653, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.048707008361816, + "rewards/margins": 0.6887640953063965, + "rewards/rejected": -5.737471580505371, + "step": 294 + }, + { + "epoch": 0.6237198546415593, + "grad_norm": 4.326812744140625, + "learning_rate": 3.717836954120178e-07, + "logits/chosen": -0.7763381004333496, + "logits/rejected": -0.7852378487586975, + "logps/chosen": -2.4861948490142822, + "logps/rejected": -2.8822267055511475, + "loss": 1.124, + "rewards/accuracies": 0.75, + "rewards/chosen": -4.9723896980285645, + "rewards/margins": 0.7920635938644409, + "rewards/rejected": -5.764453411102295, + "step": 295 + }, + { + "epoch": 0.6258341592335646, + "grad_norm": 3.886293649673462, + "learning_rate": 3.6820641696546627e-07, + "logits/chosen": -0.8350138664245605, + "logits/rejected": -0.8594292998313904, + "logps/chosen": -2.1301493644714355, + "logps/rejected": -2.3678014278411865, + "loss": 1.3532, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.260298728942871, + "rewards/margins": 0.4753049314022064, + "rewards/rejected": -4.735602855682373, + "step": 296 + }, + { + "epoch": 0.6279484638255699, + "grad_norm": 1.9318888187408447, + "learning_rate": 3.6463637388167875e-07, + "logits/chosen": -0.812870979309082, + "logits/rejected": -0.8393633961677551, + "logps/chosen": -2.0607728958129883, + "logps/rejected": -2.4457521438598633, + "loss": 1.2317, + "rewards/accuracies": 0.609375, + "rewards/chosen": -4.121545791625977, + "rewards/margins": 0.76995849609375, + "rewards/rejected": -4.891504287719727, + "step": 297 + }, + { + "epoch": 0.6300627684175751, + "grad_norm": 2.731139898300171, + "learning_rate": 3.610737621531781e-07, + "logits/chosen": -0.7860711216926575, + "logits/rejected": -0.8006534576416016, + "logps/chosen": -1.9324530363082886, + "logps/rejected": -2.2838711738586426, + "loss": 1.2986, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.864906072616577, + "rewards/margins": 0.7028359174728394, + "rewards/rejected": -4.567742347717285, + "step": 298 + }, + { + "epoch": 0.6321770730095805, + "grad_norm": 3.118441581726074, + "learning_rate": 3.575187773645112e-07, + "logits/chosen": -0.6946629285812378, + "logits/rejected": -0.6832380294799805, + "logps/chosen": -2.2569775581359863, + "logps/rejected": -2.6153128147125244, + "loss": 1.2166, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.513955116271973, + "rewards/margins": 0.7166703343391418, + "rewards/rejected": -5.230625629425049, + "step": 299 + }, + { + "epoch": 0.6342913776015857, + "grad_norm": 4.998100757598877, + "learning_rate": 3.5397161468151214e-07, + "logits/chosen": -0.7972643375396729, + "logits/rejected": -0.7864660620689392, + "logps/chosen": -2.227022886276245, + "logps/rejected": -2.57175350189209, + "loss": 1.2796, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.45404577255249, + "rewards/margins": 0.6894608736038208, + "rewards/rejected": -5.14350700378418, + "step": 300 + }, + { + "epoch": 0.6364056821935911, + "grad_norm": 6.259451866149902, + "learning_rate": 3.5043246884058777e-07, + "logits/chosen": -0.6282143592834473, + "logits/rejected": -0.6314865350723267, + "logps/chosen": -2.4372308254241943, + "logps/rejected": -2.8582205772399902, + "loss": 1.1592, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.874461650848389, + "rewards/margins": 0.8419792056083679, + "rewards/rejected": -5.7164411544799805, + "step": 301 + }, + { + "epoch": 0.6385199867855963, + "grad_norm": 2.577531337738037, + "learning_rate": 3.4690153413802653e-07, + "logits/chosen": -0.658220648765564, + "logits/rejected": -0.6330516934394836, + "logps/chosen": -2.6647050380706787, + "logps/rejected": -3.1917996406555176, + "loss": 1.2609, + "rewards/accuracies": 0.671875, + "rewards/chosen": -5.329410076141357, + "rewards/margins": 1.0541892051696777, + "rewards/rejected": -6.383599281311035, + "step": 302 + }, + { + "epoch": 0.6406342913776016, + "grad_norm": 4.733935356140137, + "learning_rate": 3.4337900441933227e-07, + "logits/chosen": -0.5048555731773376, + "logits/rejected": -0.45112305879592896, + "logps/chosen": -2.5193920135498047, + "logps/rejected": -3.1279971599578857, + "loss": 1.0648, + "rewards/accuracies": 0.765625, + "rewards/chosen": -5.038784027099609, + "rewards/margins": 1.2172104120254517, + "rewards/rejected": -6.2559943199157715, + "step": 303 + }, + { + "epoch": 0.6427485959696069, + "grad_norm": 5.54962158203125, + "learning_rate": 3.3986507306858125e-07, + "logits/chosen": -0.5305406451225281, + "logits/rejected": -0.5246613025665283, + "logps/chosen": -2.8851962089538574, + "logps/rejected": -3.248018264770508, + "loss": 1.4329, + "rewards/accuracies": 0.625, + "rewards/chosen": -5.770392417907715, + "rewards/margins": 0.7256444692611694, + "rewards/rejected": -6.496036529541016, + "step": 304 + }, + { + "epoch": 0.6448629005616121, + "grad_norm": 2.827944278717041, + "learning_rate": 3.363599329978066e-07, + "logits/chosen": -0.4795135259628296, + "logits/rejected": -0.4911767244338989, + "logps/chosen": -3.0268373489379883, + "logps/rejected": -3.4411511421203613, + "loss": 1.4083, + "rewards/accuracies": 0.65625, + "rewards/chosen": -6.053674697875977, + "rewards/margins": 0.8286278247833252, + "rewards/rejected": -6.882302284240723, + "step": 305 + }, + { + "epoch": 0.6469772051536175, + "grad_norm": 5.35672664642334, + "learning_rate": 3.328637766364075e-07, + "logits/chosen": -0.4823904037475586, + "logits/rejected": -0.48555058240890503, + "logps/chosen": -2.990793466567993, + "logps/rejected": -3.529240846633911, + "loss": 1.1417, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.981586933135986, + "rewards/margins": 1.0768945217132568, + "rewards/rejected": -7.058481693267822, + "step": 306 + }, + { + "epoch": 0.6490915097456227, + "grad_norm": 2.8072359561920166, + "learning_rate": 3.2937679592058396e-07, + "logits/chosen": -0.4903571605682373, + "logits/rejected": -0.46411609649658203, + "logps/chosen": -2.8665530681610107, + "logps/rejected": -3.542123556137085, + "loss": 1.2485, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.7331061363220215, + "rewards/margins": 1.3511409759521484, + "rewards/rejected": -7.08424711227417, + "step": 307 + }, + { + "epoch": 0.651205814337628, + "grad_norm": 6.341434478759766, + "learning_rate": 3.2589918228280066e-07, + "logits/chosen": -0.4496378004550934, + "logits/rejected": -0.35389459133148193, + "logps/chosen": -2.8208916187286377, + "logps/rejected": -3.326601505279541, + "loss": 1.3089, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -5.641783237457275, + "rewards/margins": 1.011419653892517, + "rewards/rejected": -6.653203010559082, + "step": 308 + }, + { + "epoch": 0.6533201189296333, + "grad_norm": 2.5416784286499023, + "learning_rate": 3.2243112664127723e-07, + "logits/chosen": -0.44504135847091675, + "logits/rejected": -0.42088568210601807, + "logps/chosen": -2.7710533142089844, + "logps/rejected": -3.4406185150146484, + "loss": 1.2213, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.542106628417969, + "rewards/margins": 1.3391309976577759, + "rewards/rejected": -6.881237030029297, + "step": 309 + }, + { + "epoch": 0.6554344235216386, + "grad_norm": 4.573229789733887, + "learning_rate": 3.189728193895069e-07, + "logits/chosen": -0.31100764870643616, + "logits/rejected": -0.32552966475486755, + "logps/chosen": -3.099289655685425, + "logps/rejected": -3.5152204036712646, + "loss": 1.3571, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -6.19857931137085, + "rewards/margins": 0.8318620324134827, + "rewards/rejected": -7.030440807342529, + "step": 310 + }, + { + "epoch": 0.6575487281136438, + "grad_norm": 3.7587928771972656, + "learning_rate": 3.155244503858041e-07, + "logits/chosen": -0.4225979447364807, + "logits/rejected": -0.43882372975349426, + "logps/chosen": -2.9082608222961426, + "logps/rejected": -3.2239482402801514, + "loss": 1.3415, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.816521644592285, + "rewards/margins": 0.6313749551773071, + "rewards/rejected": -6.447896480560303, + "step": 311 + }, + { + "epoch": 0.6596630327056492, + "grad_norm": 5.79728889465332, + "learning_rate": 3.12086208942881e-07, + "logits/chosen": -0.48076939582824707, + "logits/rejected": -0.41990721225738525, + "logps/chosen": -2.7089650630950928, + "logps/rejected": -3.29990291595459, + "loss": 1.1423, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -5.4179301261901855, + "rewards/margins": 1.181876540184021, + "rewards/rejected": -6.59980583190918, + "step": 312 + }, + { + "epoch": 0.6617773372976544, + "grad_norm": 7.405224800109863, + "learning_rate": 3.086582838174551e-07, + "logits/chosen": -0.48003631830215454, + "logits/rejected": -0.40571871399879456, + "logps/chosen": -2.53741455078125, + "logps/rejected": -3.0145747661590576, + "loss": 1.3247, + "rewards/accuracies": 0.609375, + "rewards/chosen": -5.0748291015625, + "rewards/margins": 0.9543203115463257, + "rewards/rejected": -6.029149532318115, + "step": 313 + }, + { + "epoch": 0.6638916418896598, + "grad_norm": 6.371465682983398, + "learning_rate": 3.052408631998863e-07, + "logits/chosen": -0.42537638545036316, + "logits/rejected": -0.39384835958480835, + "logps/chosen": -3.006593942642212, + "logps/rejected": -3.4665465354919434, + "loss": 1.2648, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -6.013187885284424, + "rewards/margins": 0.919904887676239, + "rewards/rejected": -6.933093070983887, + "step": 314 + }, + { + "epoch": 0.666005946481665, + "grad_norm": 4.65411376953125, + "learning_rate": 3.018341347038453e-07, + "logits/chosen": -0.38848310708999634, + "logits/rejected": -0.3435167670249939, + "logps/chosen": -2.9562084674835205, + "logps/rejected": -3.5491316318511963, + "loss": 1.1353, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.912416934967041, + "rewards/margins": 1.1858452558517456, + "rewards/rejected": -7.098263263702393, + "step": 315 + }, + { + "epoch": 0.6681202510736703, + "grad_norm": 5.089771747589111, + "learning_rate": 2.9843828535601397e-07, + "logits/chosen": -0.3452882170677185, + "logits/rejected": -0.29303884506225586, + "logps/chosen": -2.5367987155914307, + "logps/rejected": -3.172724723815918, + "loss": 1.2002, + "rewards/accuracies": 0.6875, + "rewards/chosen": -5.073597431182861, + "rewards/margins": 1.2718524932861328, + "rewards/rejected": -6.345449447631836, + "step": 316 + }, + { + "epoch": 0.6702345556656756, + "grad_norm": 4.480255603790283, + "learning_rate": 2.9505350158581697e-07, + "logits/chosen": -0.47401517629623413, + "logits/rejected": -0.45950815081596375, + "logps/chosen": -2.45076322555542, + "logps/rejected": -2.998079299926758, + "loss": 1.2545, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -4.90152645111084, + "rewards/margins": 1.0946320295333862, + "rewards/rejected": -5.996158599853516, + "step": 317 + }, + { + "epoch": 0.6723488602576809, + "grad_norm": 3.6318399906158447, + "learning_rate": 2.916799692151884e-07, + "logits/chosen": -0.20774951577186584, + "logits/rejected": -0.21114808320999146, + "logps/chosen": -2.8932981491088867, + "logps/rejected": -3.613022565841675, + "loss": 1.1187, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -5.786596298217773, + "rewards/margins": 1.4394491910934448, + "rewards/rejected": -7.22604513168335, + "step": 318 + }, + { + "epoch": 0.6744631648496862, + "grad_norm": 6.601771831512451, + "learning_rate": 2.883178734483692e-07, + "logits/chosen": -0.3821495473384857, + "logits/rejected": -0.35181915760040283, + "logps/chosen": -2.5047662258148193, + "logps/rejected": -3.074918270111084, + "loss": 1.1545, + "rewards/accuracies": 0.71875, + "rewards/chosen": -5.009532451629639, + "rewards/margins": 1.1403042078018188, + "rewards/rejected": -6.149836540222168, + "step": 319 + }, + { + "epoch": 0.6765774694416914, + "grad_norm": 3.077775716781616, + "learning_rate": 2.849673988617399e-07, + "logits/chosen": -0.4517952799797058, + "logits/rejected": -0.3880998194217682, + "logps/chosen": -2.5404443740844727, + "logps/rejected": -3.007855176925659, + "loss": 1.2441, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -5.080888748168945, + "rewards/margins": 0.9348208904266357, + "rewards/rejected": -6.015710353851318, + "step": 320 + }, + { + "epoch": 0.6786917740336967, + "grad_norm": 4.130971908569336, + "learning_rate": 2.8162872939368674e-07, + "logits/chosen": -0.3455219566822052, + "logits/rejected": -0.3199109137058258, + "logps/chosen": -2.5115320682525635, + "logps/rejected": -3.0809438228607178, + "loss": 1.1814, + "rewards/accuracies": 0.71875, + "rewards/chosen": -5.023064136505127, + "rewards/margins": 1.1388237476348877, + "rewards/rejected": -6.1618876457214355, + "step": 321 + }, + { + "epoch": 0.680806078625702, + "grad_norm": 6.414750099182129, + "learning_rate": 2.783020483345057e-07, + "logits/chosen": -0.500693142414093, + "logits/rejected": -0.43053722381591797, + "logps/chosen": -2.627499580383301, + "logps/rejected": -3.176882266998291, + "loss": 1.2207, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -5.254999160766602, + "rewards/margins": 1.0987658500671387, + "rewards/rejected": -6.353764533996582, + "step": 322 + }, + { + "epoch": 0.6829203832177073, + "grad_norm": 3.8955185413360596, + "learning_rate": 2.749875383163377e-07, + "logits/chosen": -0.3386150896549225, + "logits/rejected": -0.3456903100013733, + "logps/chosen": -2.5545601844787598, + "logps/rejected": -3.0574111938476562, + "loss": 1.2667, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -5.1091203689575195, + "rewards/margins": 1.0057018995285034, + "rewards/rejected": -6.1148223876953125, + "step": 323 + }, + { + "epoch": 0.6850346878097126, + "grad_norm": 4.244959831237793, + "learning_rate": 2.7168538130314345e-07, + "logits/chosen": -0.4657687246799469, + "logits/rejected": -0.41878795623779297, + "logps/chosen": -2.3406989574432373, + "logps/rejected": -2.74613094329834, + "loss": 1.2982, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.681397914886475, + "rewards/margins": 0.8108637928962708, + "rewards/rejected": -5.49226188659668, + "step": 324 + }, + { + "epoch": 0.6871489924017179, + "grad_norm": 8.914139747619629, + "learning_rate": 2.683957585807136e-07, + "logits/chosen": -0.42120760679244995, + "logits/rejected": -0.34997111558914185, + "logps/chosen": -2.4362924098968506, + "logps/rejected": -2.8844237327575684, + "loss": 1.3185, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.872584819793701, + "rewards/margins": 0.8962627649307251, + "rewards/rejected": -5.768847465515137, + "step": 325 + }, + { + "epoch": 0.6892632969937231, + "grad_norm": 2.8318073749542236, + "learning_rate": 2.651188507467161e-07, + "logits/chosen": -0.4435175657272339, + "logits/rejected": -0.43688836693763733, + "logps/chosen": -2.316673994064331, + "logps/rejected": -2.6802306175231934, + "loss": 1.2727, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.633347988128662, + "rewards/margins": 0.7271134853363037, + "rewards/rejected": -5.360461235046387, + "step": 326 + }, + { + "epoch": 0.6913776015857285, + "grad_norm": 9.15845012664795, + "learning_rate": 2.618548377007817e-07, + "logits/chosen": -0.4659804105758667, + "logits/rejected": -0.43525823950767517, + "logps/chosen": -2.3177073001861572, + "logps/rejected": -2.674837350845337, + "loss": 1.3204, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.6354146003723145, + "rewards/margins": 0.7142605781555176, + "rewards/rejected": -5.349674701690674, + "step": 327 + }, + { + "epoch": 0.6934919061777337, + "grad_norm": 8.41653060913086, + "learning_rate": 2.5860389863462763e-07, + "logits/chosen": -0.42244386672973633, + "logits/rejected": -0.3488731384277344, + "logps/chosen": -2.3063669204711914, + "logps/rejected": -2.8124496936798096, + "loss": 1.2621, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.612733840942383, + "rewards/margins": 1.0121653079986572, + "rewards/rejected": -5.624899387359619, + "step": 328 + }, + { + "epoch": 0.695606210769739, + "grad_norm": 8.558746337890625, + "learning_rate": 2.5536621202221986e-07, + "logits/chosen": -0.4081762433052063, + "logits/rejected": -0.3913821578025818, + "logps/chosen": -2.331026554107666, + "logps/rejected": -2.799482583999634, + "loss": 1.2435, + "rewards/accuracies": 0.59375, + "rewards/chosen": -4.662053108215332, + "rewards/margins": 0.9369123578071594, + "rewards/rejected": -5.598965167999268, + "step": 329 + }, + { + "epoch": 0.6977205153617443, + "grad_norm": 7.550137519836426, + "learning_rate": 2.521419556099754e-07, + "logits/chosen": -0.5334538221359253, + "logits/rejected": -0.5046267509460449, + "logps/chosen": -2.3662197589874268, + "logps/rejected": -2.8178446292877197, + "loss": 1.2172, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -4.7324395179748535, + "rewards/margins": 0.9032500386238098, + "rewards/rejected": -5.6356892585754395, + "step": 330 + }, + { + "epoch": 0.6998348199537496, + "grad_norm": 4.939478397369385, + "learning_rate": 2.4893130640700364e-07, + "logits/chosen": -0.5103824138641357, + "logits/rejected": -0.49076637625694275, + "logps/chosen": -2.0302557945251465, + "logps/rejected": -2.4443471431732178, + "loss": 1.1939, + "rewards/accuracies": 0.65625, + "rewards/chosen": -4.060511589050293, + "rewards/margins": 0.8281831741333008, + "rewards/rejected": -4.8886942863464355, + "step": 331 + }, + { + "epoch": 0.7019491245457549, + "grad_norm": 5.584677219390869, + "learning_rate": 2.4573444067538985e-07, + "logits/chosen": -0.46035417914390564, + "logits/rejected": -0.4546043574810028, + "logps/chosen": -2.1907548904418945, + "logps/rejected": -2.4913454055786133, + "loss": 1.4253, + "rewards/accuracies": 0.6015625, + "rewards/chosen": -4.381509780883789, + "rewards/margins": 0.6011807322502136, + "rewards/rejected": -4.982690811157227, + "step": 332 + }, + { + "epoch": 0.7040634291377602, + "grad_norm": 3.398441791534424, + "learning_rate": 2.425515339205165e-07, + "logits/chosen": -0.5569466352462769, + "logits/rejected": -0.5756793022155762, + "logps/chosen": -2.037411689758301, + "logps/rejected": -2.3700244426727295, + "loss": 1.3425, + "rewards/accuracies": 0.5703125, + "rewards/chosen": -4.074823379516602, + "rewards/margins": 0.6652255654335022, + "rewards/rejected": -4.740048885345459, + "step": 333 + }, + { + "epoch": 0.7061777337297654, + "grad_norm": 8.54529094696045, + "learning_rate": 2.3938276088143e-07, + "logits/chosen": -0.5746757388114929, + "logits/rejected": -0.5874296426773071, + "logps/chosen": -2.1479601860046387, + "logps/rejected": -2.584625244140625, + "loss": 1.2366, + "rewards/accuracies": 0.71875, + "rewards/chosen": -4.295920372009277, + "rewards/margins": 0.8733301758766174, + "rewards/rejected": -5.16925048828125, + "step": 334 + }, + { + "epoch": 0.7082920383217707, + "grad_norm": 5.141815662384033, + "learning_rate": 2.362282955212473e-07, + "logits/chosen": -0.6492913961410522, + "logits/rejected": -0.5812432765960693, + "logps/chosen": -1.9753435850143433, + "logps/rejected": -2.340383768081665, + "loss": 1.2197, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.9506871700286865, + "rewards/margins": 0.7300805449485779, + "rewards/rejected": -4.68076753616333, + "step": 335 + }, + { + "epoch": 0.710406342913776, + "grad_norm": 5.991698265075684, + "learning_rate": 2.3308831101760483e-07, + "logits/chosen": -0.6887751221656799, + "logits/rejected": -0.6923843622207642, + "logps/chosen": -1.577715277671814, + "logps/rejected": -1.861379623413086, + "loss": 1.2608, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.155430555343628, + "rewards/margins": 0.5673283338546753, + "rewards/rejected": -3.722759246826172, + "step": 336 + }, + { + "epoch": 0.7125206475057813, + "grad_norm": 1.5719850063323975, + "learning_rate": 2.2996297975315097e-07, + "logits/chosen": -0.6095813512802124, + "logits/rejected": -0.5842909216880798, + "logps/chosen": -1.6973541975021362, + "logps/rejected": -2.1261086463928223, + "loss": 1.2424, + "rewards/accuracies": 0.609375, + "rewards/chosen": -3.3947083950042725, + "rewards/margins": 0.857509195804596, + "rewards/rejected": -4.2522172927856445, + "step": 337 + }, + { + "epoch": 0.7146349520977866, + "grad_norm": 4.785243511199951, + "learning_rate": 2.2685247330608414e-07, + "logits/chosen": -0.7062411308288574, + "logits/rejected": -0.6849475502967834, + "logps/chosen": -1.6659798622131348, + "logps/rejected": -1.980202555656433, + "loss": 1.2512, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.3319597244262695, + "rewards/margins": 0.6284454464912415, + "rewards/rejected": -3.960405111312866, + "step": 338 + }, + { + "epoch": 0.7167492566897918, + "grad_norm": 4.3183674812316895, + "learning_rate": 2.2375696244073123e-07, + "logits/chosen": -0.6655697822570801, + "logits/rejected": -0.6642571687698364, + "logps/chosen": -1.615012764930725, + "logps/rejected": -1.9022549390792847, + "loss": 1.398, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.23002552986145, + "rewards/margins": 0.5744845867156982, + "rewards/rejected": -3.8045098781585693, + "step": 339 + }, + { + "epoch": 0.7188635612817972, + "grad_norm": 3.458740472793579, + "learning_rate": 2.2067661709817382e-07, + "logits/chosen": -0.6138105988502502, + "logits/rejected": -0.6241220235824585, + "logps/chosen": -1.5244299173355103, + "logps/rejected": -1.8252849578857422, + "loss": 1.2257, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -3.0488598346710205, + "rewards/margins": 0.6017097234725952, + "rewards/rejected": -3.6505699157714844, + "step": 340 + }, + { + "epoch": 0.7209778658738024, + "grad_norm": 3.3990859985351562, + "learning_rate": 2.1761160638691838e-07, + "logits/chosen": -0.596839964389801, + "logits/rejected": -0.5929630398750305, + "logps/chosen": -1.4333155155181885, + "logps/rejected": -1.820554494857788, + "loss": 1.1124, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.866631031036377, + "rewards/margins": 0.7744779586791992, + "rewards/rejected": -3.641108989715576, + "step": 341 + }, + { + "epoch": 0.7230921704658078, + "grad_norm": 2.742016315460205, + "learning_rate": 2.1456209857361246e-07, + "logits/chosen": -0.6483213901519775, + "logits/rejected": -0.6418218612670898, + "logps/chosen": -1.4174959659576416, + "logps/rejected": -1.831233263015747, + "loss": 1.1372, + "rewards/accuracies": 0.703125, + "rewards/chosen": -2.834991931915283, + "rewards/margins": 0.8274745941162109, + "rewards/rejected": -3.662466526031494, + "step": 342 + }, + { + "epoch": 0.725206475057813, + "grad_norm": 2.5489015579223633, + "learning_rate": 2.1152826107380651e-07, + "logits/chosen": -0.599895179271698, + "logits/rejected": -0.6154446005821228, + "logps/chosen": -1.4996072053909302, + "logps/rejected": -1.7961615324020386, + "loss": 1.2288, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.9992144107818604, + "rewards/margins": 0.5931087732315063, + "rewards/rejected": -3.592323064804077, + "step": 343 + }, + { + "epoch": 0.7273207796498183, + "grad_norm": 2.8836190700531006, + "learning_rate": 2.0851026044276405e-07, + "logits/chosen": -0.7359989285469055, + "logits/rejected": -0.7111036777496338, + "logps/chosen": -1.32615065574646, + "logps/rejected": -1.6067696809768677, + "loss": 1.2088, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.65230131149292, + "rewards/margins": 0.5612384080886841, + "rewards/rejected": -3.2135393619537354, + "step": 344 + }, + { + "epoch": 0.7294350842418236, + "grad_norm": 3.1838135719299316, + "learning_rate": 2.0550826236631596e-07, + "logits/chosen": -0.6709272265434265, + "logits/rejected": -0.6708023548126221, + "logps/chosen": -1.2859303951263428, + "logps/rejected": -1.6929675340652466, + "loss": 1.1446, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.5718607902526855, + "rewards/margins": 0.8140743374824524, + "rewards/rejected": -3.385935068130493, + "step": 345 + }, + { + "epoch": 0.7315493888338289, + "grad_norm": 2.4209675788879395, + "learning_rate": 2.025224316517663e-07, + "logits/chosen": -0.7540403604507446, + "logits/rejected": -0.7601196765899658, + "logps/chosen": -1.3634543418884277, + "logps/rejected": -1.6112797260284424, + "loss": 1.2561, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.7269086837768555, + "rewards/margins": 0.4956510066986084, + "rewards/rejected": -3.2225594520568848, + "step": 346 + }, + { + "epoch": 0.7336636934258342, + "grad_norm": 5.405437469482422, + "learning_rate": 1.9955293221884402e-07, + "logits/chosen": -0.7241419553756714, + "logits/rejected": -0.7224253416061401, + "logps/chosen": -1.2650585174560547, + "logps/rejected": -1.639666199684143, + "loss": 1.1565, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.5301170349121094, + "rewards/margins": 0.7492151856422424, + "rewards/rejected": -3.279332399368286, + "step": 347 + }, + { + "epoch": 0.7357779980178395, + "grad_norm": 1.5863631963729858, + "learning_rate": 1.9659992709070344e-07, + "logits/chosen": -0.7479431629180908, + "logits/rejected": -0.7219806909561157, + "logps/chosen": -1.294840931892395, + "logps/rejected": -1.6082017421722412, + "loss": 1.1693, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.58968186378479, + "rewards/margins": 0.6267215013504028, + "rewards/rejected": -3.2164034843444824, + "step": 348 + }, + { + "epoch": 0.7378923026098447, + "grad_norm": 1.7051454782485962, + "learning_rate": 1.936635783849742e-07, + "logits/chosen": -0.6940132975578308, + "logits/rejected": -0.7377297878265381, + "logps/chosen": -1.1897408962249756, + "logps/rejected": -1.631073236465454, + "loss": 1.1069, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -2.379481792449951, + "rewards/margins": 0.8826643228530884, + "rewards/rejected": -3.262146472930908, + "step": 349 + }, + { + "epoch": 0.74000660720185, + "grad_norm": 2.704514980316162, + "learning_rate": 1.907440473048626e-07, + "logits/chosen": -0.6926394104957581, + "logits/rejected": -0.7064180374145508, + "logps/chosen": -1.1691362857818604, + "logps/rejected": -1.511006236076355, + "loss": 1.1541, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -2.3382725715637207, + "rewards/margins": 0.6837398409843445, + "rewards/rejected": -3.02201247215271, + "step": 350 + }, + { + "epoch": 0.7421209117938553, + "grad_norm": 2.3685505390167236, + "learning_rate": 1.8784149413030004e-07, + "logits/chosen": -0.7785338759422302, + "logits/rejected": -0.7802280187606812, + "logps/chosen": -1.267012119293213, + "logps/rejected": -1.5235991477966309, + "loss": 1.177, + "rewards/accuracies": 0.65625, + "rewards/chosen": -2.534024238586426, + "rewards/margins": 0.5131738781929016, + "rewards/rejected": -3.0471982955932617, + "step": 351 + }, + { + "epoch": 0.7442352163858605, + "grad_norm": 2.8642280101776123, + "learning_rate": 1.849560782091445e-07, + "logits/chosen": -0.8269493579864502, + "logits/rejected": -0.8431333899497986, + "logps/chosen": -1.228893518447876, + "logps/rejected": -1.5784943103790283, + "loss": 1.1764, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.457787036895752, + "rewards/margins": 0.6992017030715942, + "rewards/rejected": -3.1569886207580566, + "step": 352 + }, + { + "epoch": 0.7463495209778659, + "grad_norm": 4.742166996002197, + "learning_rate": 1.8208795794843246e-07, + "logits/chosen": -0.764488160610199, + "logits/rejected": -0.7553139925003052, + "logps/chosen": -1.3095338344573975, + "logps/rejected": -1.6771752834320068, + "loss": 1.0957, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -2.619067668914795, + "rewards/margins": 0.7352830171585083, + "rewards/rejected": -3.3543505668640137, + "step": 353 + }, + { + "epoch": 0.7484638255698711, + "grad_norm": 3.543769359588623, + "learning_rate": 1.7923729080568239e-07, + "logits/chosen": -0.7355642914772034, + "logits/rejected": -0.7744429707527161, + "logps/chosen": -1.3419017791748047, + "logps/rejected": -1.591749668121338, + "loss": 1.2579, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -2.6838035583496094, + "rewards/margins": 0.4996955990791321, + "rewards/rejected": -3.183499336242676, + "step": 354 + }, + { + "epoch": 0.7505781301618765, + "grad_norm": 4.187947750091553, + "learning_rate": 1.764042332802506e-07, + "logits/chosen": -0.7009099721908569, + "logits/rejected": -0.6947562098503113, + "logps/chosen": -1.3167665004730225, + "logps/rejected": -1.640596866607666, + "loss": 1.2269, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -2.633533000946045, + "rewards/margins": 0.6476608514785767, + "rewards/rejected": -3.281193733215332, + "step": 355 + }, + { + "epoch": 0.7526924347538817, + "grad_norm": 1.7813458442687988, + "learning_rate": 1.7358894090473924e-07, + "logits/chosen": -0.7276792526245117, + "logits/rejected": -0.7536065578460693, + "logps/chosen": -1.401429295539856, + "logps/rejected": -1.7458314895629883, + "loss": 1.1934, + "rewards/accuracies": 0.6875, + "rewards/chosen": -2.802858591079712, + "rewards/margins": 0.6888045072555542, + "rewards/rejected": -3.4916629791259766, + "step": 356 + }, + { + "epoch": 0.754806739345887, + "grad_norm": 2.3192296028137207, + "learning_rate": 1.7079156823645801e-07, + "logits/chosen": -0.6756848096847534, + "logits/rejected": -0.6988381743431091, + "logps/chosen": -1.36654531955719, + "logps/rejected": -1.6672351360321045, + "loss": 1.1928, + "rewards/accuracies": 0.671875, + "rewards/chosen": -2.73309063911438, + "rewards/margins": 0.6013798117637634, + "rewards/rejected": -3.334470272064209, + "step": 357 + }, + { + "epoch": 0.7569210439378923, + "grad_norm": 2.7722420692443848, + "learning_rate": 1.6801226884893893e-07, + "logits/chosen": -0.6857397556304932, + "logits/rejected": -0.7169467806816101, + "logps/chosen": -1.4047114849090576, + "logps/rejected": -1.733205795288086, + "loss": 1.16, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -2.8094229698181152, + "rewards/margins": 0.6569885015487671, + "rewards/rejected": -3.466411590576172, + "step": 358 + }, + { + "epoch": 0.7590353485298976, + "grad_norm": 6.300495624542236, + "learning_rate": 1.6525119532350506e-07, + "logits/chosen": -0.7457281947135925, + "logits/rejected": -0.7319377660751343, + "logps/chosen": -1.282365083694458, + "logps/rejected": -1.6675825119018555, + "loss": 1.0742, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -2.564730167388916, + "rewards/margins": 0.7704350352287292, + "rewards/rejected": -3.335165023803711, + "step": 359 + }, + { + "epoch": 0.7611496531219029, + "grad_norm": 3.5068228244781494, + "learning_rate": 1.6250849924089482e-07, + "logits/chosen": -0.7112680077552795, + "logits/rejected": -0.7166794538497925, + "logps/chosen": -1.3996254205703735, + "logps/rejected": -1.6635833978652954, + "loss": 1.2438, + "rewards/accuracies": 0.625, + "rewards/chosen": -2.799250841140747, + "rewards/margins": 0.5279159545898438, + "rewards/rejected": -3.327166795730591, + "step": 360 + }, + { + "epoch": 0.7632639577139082, + "grad_norm": 1.421538233757019, + "learning_rate": 1.5978433117293883e-07, + "logits/chosen": -0.7009663581848145, + "logits/rejected": -0.6878695487976074, + "logps/chosen": -1.4174691438674927, + "logps/rejected": -1.802457332611084, + "loss": 1.0885, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -2.8349382877349854, + "rewards/margins": 0.7699761986732483, + "rewards/rejected": -3.604914665222168, + "step": 361 + }, + { + "epoch": 0.7653782623059134, + "grad_norm": 3.2645766735076904, + "learning_rate": 1.5707884067429471e-07, + "logits/chosen": -0.6865817904472351, + "logits/rejected": -0.7084690928459167, + "logps/chosen": -1.377517819404602, + "logps/rejected": -1.7079989910125732, + "loss": 1.2371, + "rewards/accuracies": 0.640625, + "rewards/chosen": -2.755035638809204, + "rewards/margins": 0.660962700843811, + "rewards/rejected": -3.4159979820251465, + "step": 362 + }, + { + "epoch": 0.7674925668979188, + "grad_norm": 1.973783254623413, + "learning_rate": 1.5439217627423695e-07, + "logits/chosen": -0.7317283153533936, + "logits/rejected": -0.7571225166320801, + "logps/chosen": -1.63040030002594, + "logps/rejected": -2.027442216873169, + "loss": 1.1614, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.26080060005188, + "rewards/margins": 0.7940834760665894, + "rewards/rejected": -4.054884433746338, + "step": 363 + }, + { + "epoch": 0.769606871489924, + "grad_norm": 4.545448303222656, + "learning_rate": 1.5172448546850163e-07, + "logits/chosen": -0.6746503710746765, + "logits/rejected": -0.7073549628257751, + "logps/chosen": -1.321073055267334, + "logps/rejected": -1.6741642951965332, + "loss": 1.1609, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -2.642146110534668, + "rewards/margins": 0.7061826586723328, + "rewards/rejected": -3.3483285903930664, + "step": 364 + }, + { + "epoch": 0.7717211760819292, + "grad_norm": 8.678997039794922, + "learning_rate": 1.490759147111894e-07, + "logits/chosen": -0.6089351773262024, + "logits/rejected": -0.6172072291374207, + "logps/chosen": -1.6598318815231323, + "logps/rejected": -1.9151239395141602, + "loss": 1.2762, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.3196637630462646, + "rewards/margins": 0.5105838775634766, + "rewards/rejected": -3.8302478790283203, + "step": 365 + }, + { + "epoch": 0.7738354806739346, + "grad_norm": 3.29367733001709, + "learning_rate": 1.4644660940672627e-07, + "logits/chosen": -0.6255152821540833, + "logits/rejected": -0.6178345680236816, + "logps/chosen": -1.7635339498519897, + "logps/rejected": -2.02409029006958, + "loss": 1.4469, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.5270678997039795, + "rewards/margins": 0.5211121439933777, + "rewards/rejected": -4.04818058013916, + "step": 366 + }, + { + "epoch": 0.7759497852659398, + "grad_norm": 3.500715732574463, + "learning_rate": 1.438367139018796e-07, + "logits/chosen": -0.6738446354866028, + "logits/rejected": -0.671849250793457, + "logps/chosen": -1.603959560394287, + "logps/rejected": -2.140998363494873, + "loss": 0.9771, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.207919120788574, + "rewards/margins": 1.0740783214569092, + "rewards/rejected": -4.281996726989746, + "step": 367 + }, + { + "epoch": 0.7780640898579452, + "grad_norm": 2.8842501640319824, + "learning_rate": 1.412463714778343e-07, + "logits/chosen": -0.6544129252433777, + "logits/rejected": -0.6667245030403137, + "logps/chosen": -1.7409751415252686, + "logps/rejected": -2.1441538333892822, + "loss": 1.1043, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.481950283050537, + "rewards/margins": 0.806357741355896, + "rewards/rejected": -4.2883076667785645, + "step": 368 + }, + { + "epoch": 0.7801783944499504, + "grad_norm": 3.7606077194213867, + "learning_rate": 1.3867572434232728e-07, + "logits/chosen": -0.6620441675186157, + "logits/rejected": -0.6536539793014526, + "logps/chosen": -1.6755543947219849, + "logps/rejected": -2.012425184249878, + "loss": 1.2249, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.3511087894439697, + "rewards/margins": 0.6737421154975891, + "rewards/rejected": -4.024850368499756, + "step": 369 + }, + { + "epoch": 0.7822926990419558, + "grad_norm": 3.284456729888916, + "learning_rate": 1.3612491362183887e-07, + "logits/chosen": -0.6353476047515869, + "logits/rejected": -0.6363587975502014, + "logps/chosen": -1.6001538038253784, + "logps/rejected": -2.0670526027679443, + "loss": 1.0746, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.200307607650757, + "rewards/margins": 0.9337971210479736, + "rewards/rejected": -4.134105205535889, + "step": 370 + }, + { + "epoch": 0.784407003633961, + "grad_norm": 1.9063444137573242, + "learning_rate": 1.3359407935384642e-07, + "logits/chosen": -0.6120063662528992, + "logits/rejected": -0.5794797539710999, + "logps/chosen": -1.4489734172821045, + "logps/rejected": -1.9216854572296143, + "loss": 1.0928, + "rewards/accuracies": 0.71875, + "rewards/chosen": -2.897946834564209, + "rewards/margins": 0.9454240798950195, + "rewards/rejected": -3.8433709144592285, + "step": 371 + }, + { + "epoch": 0.7865213082259663, + "grad_norm": 6.973724842071533, + "learning_rate": 1.3108336047913633e-07, + "logits/chosen": -0.6082984209060669, + "logits/rejected": -0.6162828207015991, + "logps/chosen": -1.7623229026794434, + "logps/rejected": -2.239130735397339, + "loss": 1.2665, + "rewards/accuracies": 0.59375, + "rewards/chosen": -3.5246458053588867, + "rewards/margins": 0.9536150693893433, + "rewards/rejected": -4.478261470794678, + "step": 372 + }, + { + "epoch": 0.7886356128179716, + "grad_norm": 3.874128580093384, + "learning_rate": 1.2859289483417557e-07, + "logits/chosen": -0.5540960431098938, + "logits/rejected": -0.5091680884361267, + "logps/chosen": -1.85587739944458, + "logps/rejected": -2.3959312438964844, + "loss": 1.0672, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.71175479888916, + "rewards/margins": 1.0801074504852295, + "rewards/rejected": -4.791862487792969, + "step": 373 + }, + { + "epoch": 0.7907499174099769, + "grad_norm": 13.771154403686523, + "learning_rate": 1.261228191435445e-07, + "logits/chosen": -0.599963903427124, + "logits/rejected": -0.5765703916549683, + "logps/chosen": -1.7974251508712769, + "logps/rejected": -2.2272088527679443, + "loss": 1.1994, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.5948503017425537, + "rewards/margins": 0.8595672249794006, + "rewards/rejected": -4.454417705535889, + "step": 374 + }, + { + "epoch": 0.7928642220019821, + "grad_norm": 2.5084969997406006, + "learning_rate": 1.2367326901243214e-07, + "logits/chosen": -0.5945304036140442, + "logits/rejected": -0.6021737456321716, + "logps/chosen": -1.9855573177337646, + "logps/rejected": -2.3953022956848145, + "loss": 1.2576, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.9711146354675293, + "rewards/margins": 0.8194906711578369, + "rewards/rejected": -4.790604591369629, + "step": 375 + }, + { + "epoch": 0.7949785265939875, + "grad_norm": 4.571497440338135, + "learning_rate": 1.2124437891918993e-07, + "logits/chosen": -0.5888144373893738, + "logits/rejected": -0.5575076937675476, + "logps/chosen": -1.8334908485412598, + "logps/rejected": -2.153212070465088, + "loss": 1.2104, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.6669816970825195, + "rewards/margins": 0.639442503452301, + "rewards/rejected": -4.306424140930176, + "step": 376 + }, + { + "epoch": 0.7970928311859927, + "grad_norm": 5.023235321044922, + "learning_rate": 1.1883628220795005e-07, + "logits/chosen": -0.632038414478302, + "logits/rejected": -0.6368271708488464, + "logps/chosen": -1.8573570251464844, + "logps/rejected": -2.291320323944092, + "loss": 1.1719, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.7147140502929688, + "rewards/margins": 0.8679270148277283, + "rewards/rejected": -4.582640647888184, + "step": 377 + }, + { + "epoch": 0.7992071357779981, + "grad_norm": 4.98567533493042, + "learning_rate": 1.1644911108130434e-07, + "logits/chosen": -0.5647228360176086, + "logits/rejected": -0.5541558265686035, + "logps/chosen": -1.8232372999191284, + "logps/rejected": -2.2992348670959473, + "loss": 1.1476, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.646474599838257, + "rewards/margins": 0.9519950747489929, + "rewards/rejected": -4.5984697341918945, + "step": 378 + }, + { + "epoch": 0.8013214403700033, + "grad_norm": 9.514540672302246, + "learning_rate": 1.1408299659304682e-07, + "logits/chosen": -0.5385195016860962, + "logits/rejected": -0.5475942492485046, + "logps/chosen": -2.077877998352051, + "logps/rejected": -2.4877052307128906, + "loss": 1.1605, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -4.155755996704102, + "rewards/margins": 0.8196545243263245, + "rewards/rejected": -4.975410461425781, + "step": 379 + }, + { + "epoch": 0.8034357449620085, + "grad_norm": 7.652558326721191, + "learning_rate": 1.1173806864097884e-07, + "logits/chosen": -0.5651392936706543, + "logits/rejected": -0.5097556114196777, + "logps/chosen": -1.9452521800994873, + "logps/rejected": -2.376047134399414, + "loss": 1.2004, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.8905043601989746, + "rewards/margins": 0.8615895509719849, + "rewards/rejected": -4.752094268798828, + "step": 380 + }, + { + "epoch": 0.8055500495540139, + "grad_norm": 6.184218406677246, + "learning_rate": 1.0941445595977766e-07, + "logits/chosen": -0.5738644599914551, + "logits/rejected": -0.570101797580719, + "logps/chosen": -2.0233359336853027, + "logps/rejected": -2.5829384326934814, + "loss": 1.1539, + "rewards/accuracies": 0.71875, + "rewards/chosen": -4.0466718673706055, + "rewards/margins": 1.1192048788070679, + "rewards/rejected": -5.165876865386963, + "step": 381 + }, + { + "epoch": 0.8076643541460191, + "grad_norm": 4.697547435760498, + "learning_rate": 1.0711228611392936e-07, + "logits/chosen": -0.5766915082931519, + "logits/rejected": -0.5619411468505859, + "logps/chosen": -2.0546395778656006, + "logps/rejected": -2.4459054470062256, + "loss": 1.2723, + "rewards/accuracies": 0.59375, + "rewards/chosen": -4.109279155731201, + "rewards/margins": 0.7825320959091187, + "rewards/rejected": -4.891810894012451, + "step": 382 + }, + { + "epoch": 0.8097786587380245, + "grad_norm": 5.595128536224365, + "learning_rate": 1.0483168549072518e-07, + "logits/chosen": -0.6808648109436035, + "logits/rejected": -0.6518751382827759, + "logps/chosen": -1.9909974336624146, + "logps/rejected": -2.3775596618652344, + "loss": 1.2501, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.981994867324829, + "rewards/margins": 0.7731241583824158, + "rewards/rejected": -4.755119323730469, + "step": 383 + }, + { + "epoch": 0.8118929633300297, + "grad_norm": 3.6460607051849365, + "learning_rate": 1.0257277929332331e-07, + "logits/chosen": -0.6901826858520508, + "logits/rejected": -0.703309953212738, + "logps/chosen": -1.9317903518676758, + "logps/rejected": -2.322279930114746, + "loss": 1.1945, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.8635807037353516, + "rewards/margins": 0.780979335308075, + "rewards/rejected": -4.644559860229492, + "step": 384 + }, + { + "epoch": 0.814007267922035, + "grad_norm": 8.366463661193848, + "learning_rate": 1.0033569153387561e-07, + "logits/chosen": -0.5720599293708801, + "logits/rejected": -0.5666248798370361, + "logps/chosen": -1.9946173429489136, + "logps/rejected": -2.3951826095581055, + "loss": 1.3349, + "rewards/accuracies": 0.546875, + "rewards/chosen": -3.989234685897827, + "rewards/margins": 0.8011305332183838, + "rewards/rejected": -4.790365219116211, + "step": 385 + }, + { + "epoch": 0.8161215725140403, + "grad_norm": 1.8578377962112427, + "learning_rate": 9.812054502671834e-08, + "logits/chosen": -0.6122175455093384, + "logits/rejected": -0.5665942430496216, + "logps/chosen": -2.1414878368377686, + "logps/rejected": -2.646432399749756, + "loss": 1.1834, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.282975673675537, + "rewards/margins": 1.009889006614685, + "rewards/rejected": -5.292864799499512, + "step": 386 + }, + { + "epoch": 0.8182358771060456, + "grad_norm": 4.7323408126831055, + "learning_rate": 9.592746138163093e-08, + "logits/chosen": -0.5390607118606567, + "logits/rejected": -0.5227072834968567, + "logps/chosen": -2.1249067783355713, + "logps/rejected": -2.688115119934082, + "loss": 1.2211, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.249813556671143, + "rewards/margins": 1.1264164447784424, + "rewards/rejected": -5.376230239868164, + "step": 387 + }, + { + "epoch": 0.8203501816980509, + "grad_norm": 2.5557284355163574, + "learning_rate": 9.375656099715934e-08, + "logits/chosen": -0.5654515027999878, + "logits/rejected": -0.5636597275733948, + "logps/chosen": -2.126107692718506, + "logps/rejected": -2.606684684753418, + "loss": 1.1995, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.252215385437012, + "rewards/margins": 0.9611539244651794, + "rewards/rejected": -5.213369369506836, + "step": 388 + }, + { + "epoch": 0.8224644862900562, + "grad_norm": 4.177574634552002, + "learning_rate": 9.160796305400609e-08, + "logits/chosen": -0.6432445645332336, + "logits/rejected": -0.6587055921554565, + "logps/chosen": -2.0785441398620605, + "logps/rejected": -2.4507219791412354, + "loss": 1.2339, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.157088279724121, + "rewards/margins": 0.7443561553955078, + "rewards/rejected": -4.901443958282471, + "step": 389 + }, + { + "epoch": 0.8245787908820614, + "grad_norm": 5.901131629943848, + "learning_rate": 8.9481785508487e-08, + "logits/chosen": -0.588135302066803, + "logits/rejected": -0.5850880742073059, + "logps/chosen": -2.225466251373291, + "logps/rejected": -2.638160467147827, + "loss": 1.2255, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.450932502746582, + "rewards/margins": 0.8253881335258484, + "rewards/rejected": -5.276320934295654, + "step": 390 + }, + { + "epoch": 0.8266930954740668, + "grad_norm": 2.727555751800537, + "learning_rate": 8.737814508605673e-08, + "logits/chosen": -0.5863823294639587, + "logits/rejected": -0.590294599533081, + "logps/chosen": -1.9851064682006836, + "logps/rejected": -2.579831600189209, + "loss": 1.0113, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.970212936401367, + "rewards/margins": 1.1894500255584717, + "rewards/rejected": -5.159663200378418, + "step": 391 + }, + { + "epoch": 0.828807400066072, + "grad_norm": 9.048048973083496, + "learning_rate": 8.529715727489912e-08, + "logits/chosen": -0.5600543022155762, + "logits/rejected": -0.5537065267562866, + "logps/chosen": -1.9846975803375244, + "logps/rejected": -2.2676990032196045, + "loss": 1.3045, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.969395160675049, + "rewards/margins": 0.5660032629966736, + "rewards/rejected": -4.535398006439209, + "step": 392 + }, + { + "epoch": 0.8309217046580774, + "grad_norm": 3.4390201568603516, + "learning_rate": 8.323893631958806e-08, + "logits/chosen": -0.6335893273353577, + "logits/rejected": -0.6190727949142456, + "logps/chosen": -1.908363938331604, + "logps/rejected": -2.510305166244507, + "loss": 1.0262, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.816727876663208, + "rewards/margins": 1.2038825750350952, + "rewards/rejected": -5.020610332489014, + "step": 393 + }, + { + "epoch": 0.8330360092500826, + "grad_norm": 5.347372531890869, + "learning_rate": 8.120359521481501e-08, + "logits/chosen": -0.6408874988555908, + "logits/rejected": -0.643690288066864, + "logps/chosen": -2.019606828689575, + "logps/rejected": -2.3068103790283203, + "loss": 1.3028, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.03921365737915, + "rewards/margins": 0.574406623840332, + "rewards/rejected": -4.613620758056641, + "step": 394 + }, + { + "epoch": 0.8351503138420878, + "grad_norm": 2.2186920642852783, + "learning_rate": 7.9191245699186e-08, + "logits/chosen": -0.7156819105148315, + "logits/rejected": -0.6814436316490173, + "logps/chosen": -2.108549118041992, + "logps/rejected": -2.608646869659424, + "loss": 1.2948, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.217098236083984, + "rewards/margins": 1.0001959800720215, + "rewards/rejected": -5.217293739318848, + "step": 395 + }, + { + "epoch": 0.8372646184340932, + "grad_norm": 2.6448726654052734, + "learning_rate": 7.720199824908691e-08, + "logits/chosen": -0.5753149390220642, + "logits/rejected": -0.6065633296966553, + "logps/chosen": -2.2337100505828857, + "logps/rejected": -2.6677160263061523, + "loss": 1.2273, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.4674201011657715, + "rewards/margins": 0.868012011051178, + "rewards/rejected": -5.335432052612305, + "step": 396 + }, + { + "epoch": 0.8393789230260984, + "grad_norm": 6.596648216247559, + "learning_rate": 7.523596207261907e-08, + "logits/chosen": -0.5432775616645813, + "logits/rejected": -0.4928567409515381, + "logps/chosen": -2.1113924980163574, + "logps/rejected": -2.482846975326538, + "loss": 1.319, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.222784996032715, + "rewards/margins": 0.7429092526435852, + "rewards/rejected": -4.965693950653076, + "step": 397 + }, + { + "epoch": 0.8414932276181037, + "grad_norm": 3.9646811485290527, + "learning_rate": 7.329324510360269e-08, + "logits/chosen": -0.5816119909286499, + "logits/rejected": -0.564030110836029, + "logps/chosen": -2.0296411514282227, + "logps/rejected": -2.5152456760406494, + "loss": 1.1645, + "rewards/accuracies": 0.703125, + "rewards/chosen": -4.059282302856445, + "rewards/margins": 0.9712092876434326, + "rewards/rejected": -5.030491352081299, + "step": 398 + }, + { + "epoch": 0.843607532210109, + "grad_norm": 2.7787463665008545, + "learning_rate": 7.137395399565249e-08, + "logits/chosen": -0.6342184543609619, + "logits/rejected": -0.6318203210830688, + "logps/chosen": -2.0209803581237793, + "logps/rejected": -2.5250658988952637, + "loss": 1.1822, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.041960716247559, + "rewards/margins": 1.0081708431243896, + "rewards/rejected": -5.050131797790527, + "step": 399 + }, + { + "epoch": 0.8457218368021143, + "grad_norm": 4.476524353027344, + "learning_rate": 6.947819411632222e-08, + "logits/chosen": -0.5809480547904968, + "logits/rejected": -0.5740150213241577, + "logps/chosen": -1.9072691202163696, + "logps/rejected": -2.2942898273468018, + "loss": 1.3214, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.8145382404327393, + "rewards/margins": 0.7740417718887329, + "rewards/rejected": -4.5885796546936035, + "step": 400 + }, + { + "epoch": 0.8478361413941196, + "grad_norm": 2.47866153717041, + "learning_rate": 6.760606954131965e-08, + "logits/chosen": -0.5540263652801514, + "logits/rejected": -0.5378059148788452, + "logps/chosen": -1.8337305784225464, + "logps/rejected": -2.264974594116211, + "loss": 1.2396, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.6674611568450928, + "rewards/margins": 0.8624882698059082, + "rewards/rejected": -4.529949188232422, + "step": 401 + }, + { + "epoch": 0.8499504459861249, + "grad_norm": 2.800645112991333, + "learning_rate": 6.575768304879292e-08, + "logits/chosen": -0.6384072303771973, + "logits/rejected": -0.6310533285140991, + "logps/chosen": -1.9723026752471924, + "logps/rejected": -2.3342039585113525, + "loss": 1.2746, + "rewards/accuracies": 0.6171875, + "rewards/chosen": -3.9446053504943848, + "rewards/margins": 0.7238021492958069, + "rewards/rejected": -4.668407917022705, + "step": 402 + }, + { + "epoch": 0.8520647505781301, + "grad_norm": 2.794485092163086, + "learning_rate": 6.3933136113689e-08, + "logits/chosen": -0.7269207239151001, + "logits/rejected": -0.7003817558288574, + "logps/chosen": -1.8535553216934204, + "logps/rejected": -2.2630820274353027, + "loss": 1.1774, + "rewards/accuracies": 0.765625, + "rewards/chosen": -3.707110643386841, + "rewards/margins": 0.8190534114837646, + "rewards/rejected": -4.5261640548706055, + "step": 403 + }, + { + "epoch": 0.8541790551701355, + "grad_norm": 12.197257041931152, + "learning_rate": 6.213252890218162e-08, + "logits/chosen": -0.5296715497970581, + "logits/rejected": -0.5422269105911255, + "logps/chosen": -1.8217012882232666, + "logps/rejected": -2.3873071670532227, + "loss": 1.1467, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.643402576446533, + "rewards/margins": 1.1312119960784912, + "rewards/rejected": -4.774614334106445, + "step": 404 + }, + { + "epoch": 0.8562933597621407, + "grad_norm": 2.396972179412842, + "learning_rate": 6.03559602661729e-08, + "logits/chosen": -0.6527739763259888, + "logits/rejected": -0.645389199256897, + "logps/chosen": -1.9720454216003418, + "logps/rejected": -2.2900233268737793, + "loss": 1.3423, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.9440908432006836, + "rewards/margins": 0.6359554529190063, + "rewards/rejected": -4.580046653747559, + "step": 405 + }, + { + "epoch": 0.8584076643541461, + "grad_norm": 3.5759809017181396, + "learning_rate": 5.8603527737866307e-08, + "logits/chosen": -0.5955278277397156, + "logits/rejected": -0.583007276058197, + "logps/chosen": -1.835761547088623, + "logps/rejected": -2.2889809608459473, + "loss": 1.1015, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.671523094177246, + "rewards/margins": 0.9064393639564514, + "rewards/rejected": -4.5779619216918945, + "step": 406 + }, + { + "epoch": 0.8605219689461513, + "grad_norm": 8.514383316040039, + "learning_rate": 5.687532752441232e-08, + "logits/chosen": -0.6325979828834534, + "logits/rejected": -0.5895124077796936, + "logps/chosen": -2.0668628215789795, + "logps/rejected": -2.4919605255126953, + "loss": 1.2469, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.133725643157959, + "rewards/margins": 0.8501947522163391, + "rewards/rejected": -4.983921051025391, + "step": 407 + }, + { + "epoch": 0.8626362735381566, + "grad_norm": 2.7234861850738525, + "learning_rate": 5.517145450262639e-08, + "logits/chosen": -0.5355826616287231, + "logits/rejected": -0.5421631932258606, + "logps/chosen": -1.8649351596832275, + "logps/rejected": -2.5664312839508057, + "loss": 1.0119, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.729870319366455, + "rewards/margins": 1.4029927253723145, + "rewards/rejected": -5.132862567901611, + "step": 408 + }, + { + "epoch": 0.8647505781301619, + "grad_norm": 3.1693661212921143, + "learning_rate": 5.3492002213780754e-08, + "logits/chosen": -0.5687247514724731, + "logits/rejected": -0.5579267740249634, + "logps/chosen": -2.0369410514831543, + "logps/rejected": -2.4640278816223145, + "loss": 1.311, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.073882102966309, + "rewards/margins": 0.8541740775108337, + "rewards/rejected": -4.928055763244629, + "step": 409 + }, + { + "epoch": 0.8668648827221671, + "grad_norm": 1.8922606706619263, + "learning_rate": 5.183706285846873e-08, + "logits/chosen": -0.6247987151145935, + "logits/rejected": -0.6043509244918823, + "logps/chosen": -1.8121845722198486, + "logps/rejected": -2.2492425441741943, + "loss": 1.1291, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.6243691444396973, + "rewards/margins": 0.8741158843040466, + "rewards/rejected": -4.498485088348389, + "step": 410 + }, + { + "epoch": 0.8689791873141725, + "grad_norm": 5.305470943450928, + "learning_rate": 5.020672729154307e-08, + "logits/chosen": -0.5554785132408142, + "logits/rejected": -0.565819501876831, + "logps/chosen": -1.9100950956344604, + "logps/rejected": -2.4060237407684326, + "loss": 1.1576, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.820190191268921, + "rewards/margins": 0.9918570518493652, + "rewards/rejected": -4.812047481536865, + "step": 411 + }, + { + "epoch": 0.8710934919061777, + "grad_norm": 3.2367563247680664, + "learning_rate": 4.860108501712823e-08, + "logits/chosen": -0.6536320447921753, + "logits/rejected": -0.6901589035987854, + "logps/chosen": -1.9213619232177734, + "logps/rejected": -2.270475387573242, + "loss": 1.2711, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.842723846435547, + "rewards/margins": 0.6982269287109375, + "rewards/rejected": -4.540950775146484, + "step": 412 + }, + { + "epoch": 0.873207796498183, + "grad_norm": 4.2919135093688965, + "learning_rate": 4.7020224183706715e-08, + "logits/chosen": -0.7220910787582397, + "logits/rejected": -0.7015137672424316, + "logps/chosen": -1.7745577096939087, + "logps/rejected": -2.2485008239746094, + "loss": 1.0773, + "rewards/accuracies": 0.7421875, + "rewards/chosen": -3.5491154193878174, + "rewards/margins": 0.9478861093521118, + "rewards/rejected": -4.497001647949219, + "step": 413 + }, + { + "epoch": 0.8753221010901883, + "grad_norm": 6.373754501342773, + "learning_rate": 4.54642315792792e-08, + "logits/chosen": -0.6177189946174622, + "logits/rejected": -0.6176800727844238, + "logps/chosen": -1.8406522274017334, + "logps/rejected": -2.368619441986084, + "loss": 1.1582, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.681304454803467, + "rewards/margins": 1.0559337139129639, + "rewards/rejected": -4.737238883972168, + "step": 414 + }, + { + "epoch": 0.8774364056821936, + "grad_norm": 4.120994567871094, + "learning_rate": 4.3933192626600725e-08, + "logits/chosen": -0.5981518626213074, + "logits/rejected": -0.5846447348594666, + "logps/chosen": -1.9437062740325928, + "logps/rejected": -2.4175901412963867, + "loss": 1.1865, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.8874125480651855, + "rewards/margins": 0.9477680921554565, + "rewards/rejected": -4.835180282592773, + "step": 415 + }, + { + "epoch": 0.8795507102741988, + "grad_norm": 3.618441104888916, + "learning_rate": 4.242719137849077e-08, + "logits/chosen": -0.544365644454956, + "logits/rejected": -0.5385901927947998, + "logps/chosen": -1.8662028312683105, + "logps/rejected": -2.2550435066223145, + "loss": 1.2125, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.732405662536621, + "rewards/margins": 0.7776816487312317, + "rewards/rejected": -4.510087013244629, + "step": 416 + }, + { + "epoch": 0.8816650148662042, + "grad_norm": 8.518675804138184, + "learning_rate": 4.0946310513218726e-08, + "logits/chosen": -0.6048115491867065, + "logits/rejected": -0.5681714415550232, + "logps/chosen": -2.020745038986206, + "logps/rejected": -2.5642106533050537, + "loss": 1.1682, + "rewards/accuracies": 0.7109375, + "rewards/chosen": -4.041490077972412, + "rewards/margins": 1.0869308710098267, + "rewards/rejected": -5.128421306610107, + "step": 417 + }, + { + "epoch": 0.8837793194582094, + "grad_norm": 4.693824768066406, + "learning_rate": 3.9490631329964554e-08, + "logits/chosen": -0.5653468370437622, + "logits/rejected": -0.5610933303833008, + "logps/chosen": -1.8477216958999634, + "logps/rejected": -2.280613660812378, + "loss": 1.2177, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.6954433917999268, + "rewards/margins": 0.8657836318016052, + "rewards/rejected": -4.561227321624756, + "step": 418 + }, + { + "epoch": 0.8858936240502148, + "grad_norm": 4.910251617431641, + "learning_rate": 3.806023374435663e-08, + "logits/chosen": -0.6456243991851807, + "logits/rejected": -0.6571968197822571, + "logps/chosen": -1.8414027690887451, + "logps/rejected": -2.2380261421203613, + "loss": 1.2081, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.6828055381774902, + "rewards/margins": 0.7932465076446533, + "rewards/rejected": -4.476052284240723, + "step": 419 + }, + { + "epoch": 0.88800792864222, + "grad_norm": 2.260300636291504, + "learning_rate": 3.665519628408331e-08, + "logits/chosen": -0.6023683547973633, + "logits/rejected": -0.6400430798530579, + "logps/chosen": -2.039283275604248, + "logps/rejected": -2.520536184310913, + "loss": 1.1629, + "rewards/accuracies": 0.671875, + "rewards/chosen": -4.078566551208496, + "rewards/margins": 0.962505042552948, + "rewards/rejected": -5.041072368621826, + "step": 420 + }, + { + "epoch": 0.8901222332342253, + "grad_norm": 2.411315679550171, + "learning_rate": 3.527559608458225e-08, + "logits/chosen": -0.6408150792121887, + "logits/rejected": -0.6065229177474976, + "logps/chosen": -1.91830313205719, + "logps/rejected": -2.378871440887451, + "loss": 1.1848, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.83660626411438, + "rewards/margins": 0.9211370944976807, + "rewards/rejected": -4.757742881774902, + "step": 421 + }, + { + "epoch": 0.8922365378262306, + "grad_norm": 8.43724250793457, + "learning_rate": 3.39215088848061e-08, + "logits/chosen": -0.5962439179420471, + "logits/rejected": -0.5975909233093262, + "logps/chosen": -1.9837861061096191, + "logps/rejected": -2.319769859313965, + "loss": 1.2026, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.9675722122192383, + "rewards/margins": 0.6719677448272705, + "rewards/rejected": -4.63953971862793, + "step": 422 + }, + { + "epoch": 0.8943508424182359, + "grad_norm": 2.261178731918335, + "learning_rate": 3.259300902306367e-08, + "logits/chosen": -0.6858331561088562, + "logits/rejected": -0.7034648060798645, + "logps/chosen": -1.8496602773666382, + "logps/rejected": -2.3583877086639404, + "loss": 1.1137, + "rewards/accuracies": 0.734375, + "rewards/chosen": -3.6993205547332764, + "rewards/margins": 1.0174546241760254, + "rewards/rejected": -4.716775417327881, + "step": 423 + }, + { + "epoch": 0.8964651470102412, + "grad_norm": 7.621473789215088, + "learning_rate": 3.129016943293955e-08, + "logits/chosen": -0.6037753224372864, + "logits/rejected": -0.5865834355354309, + "logps/chosen": -1.902024507522583, + "logps/rejected": -2.3152518272399902, + "loss": 1.2577, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.804049015045166, + "rewards/margins": 0.8264546394348145, + "rewards/rejected": -4.6305036544799805, + "step": 424 + }, + { + "epoch": 0.8985794516022465, + "grad_norm": 2.954953908920288, + "learning_rate": 3.001306163928985e-08, + "logits/chosen": -0.6682695746421814, + "logits/rejected": -0.6516857147216797, + "logps/chosen": -2.0923025608062744, + "logps/rejected": -2.4602210521698, + "loss": 1.3758, + "rewards/accuracies": 0.5859375, + "rewards/chosen": -4.184605121612549, + "rewards/margins": 0.7358372211456299, + "rewards/rejected": -4.9204421043396, + "step": 425 + }, + { + "epoch": 0.9006937561942517, + "grad_norm": 4.746059894561768, + "learning_rate": 2.8761755754315663e-08, + "logits/chosen": -0.6213058829307556, + "logits/rejected": -0.6071665287017822, + "logps/chosen": -1.9309402704238892, + "logps/rejected": -2.3048858642578125, + "loss": 1.2216, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.8618805408477783, + "rewards/margins": 0.7478916049003601, + "rewards/rejected": -4.609771728515625, + "step": 426 + }, + { + "epoch": 0.902808060786257, + "grad_norm": 3.4567902088165283, + "learning_rate": 2.753632047371335e-08, + "logits/chosen": -0.5602300763130188, + "logits/rejected": -0.5994393825531006, + "logps/chosen": -2.0382192134857178, + "logps/rejected": -2.4620015621185303, + "loss": 1.1534, + "rewards/accuracies": 0.6875, + "rewards/chosen": -4.0764384269714355, + "rewards/margins": 0.8475649952888489, + "rewards/rejected": -4.9240031242370605, + "step": 427 + }, + { + "epoch": 0.9049223653782623, + "grad_norm": 8.650147438049316, + "learning_rate": 2.63368230729043e-08, + "logits/chosen": -0.6574521660804749, + "logits/rejected": -0.6474560499191284, + "logps/chosen": -2.01283860206604, + "logps/rejected": -2.3451762199401855, + "loss": 1.3337, + "rewards/accuracies": 0.625, + "rewards/chosen": -4.02567720413208, + "rewards/margins": 0.6646751165390015, + "rewards/rejected": -4.690352439880371, + "step": 428 + }, + { + "epoch": 0.9070366699702676, + "grad_norm": 4.965768337249756, + "learning_rate": 2.5163329403340593e-08, + "logits/chosen": -0.632398784160614, + "logits/rejected": -0.6226595640182495, + "logps/chosen": -1.9954252243041992, + "logps/rejected": -2.415121555328369, + "loss": 1.1249, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -3.9908504486083984, + "rewards/margins": 0.8393926024436951, + "rewards/rejected": -4.830243110656738, + "step": 429 + }, + { + "epoch": 0.9091509745622729, + "grad_norm": 4.165818214416504, + "learning_rate": 2.4015903888890242e-08, + "logits/chosen": -0.6372086405754089, + "logits/rejected": -0.6573516130447388, + "logps/chosen": -1.9238042831420898, + "logps/rejected": -2.3672964572906494, + "loss": 1.1372, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.8476085662841797, + "rewards/margins": 0.8869843482971191, + "rewards/rejected": -4.734592914581299, + "step": 430 + }, + { + "epoch": 0.9112652791542781, + "grad_norm": 4.025818347930908, + "learning_rate": 2.289460952230038e-08, + "logits/chosen": -0.6017577648162842, + "logits/rejected": -0.5835919380187988, + "logps/chosen": -1.9263951778411865, + "logps/rejected": -2.364337921142578, + "loss": 1.1519, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.852790355682373, + "rewards/margins": 0.875885009765625, + "rewards/rejected": -4.728675842285156, + "step": 431 + }, + { + "epoch": 0.9133795837462835, + "grad_norm": 2.232624053955078, + "learning_rate": 2.1799507861738788e-08, + "logits/chosen": -0.697775661945343, + "logits/rejected": -0.7254015803337097, + "logps/chosen": -1.8258415460586548, + "logps/rejected": -2.089477777481079, + "loss": 1.3136, + "rewards/accuracies": 0.578125, + "rewards/chosen": -3.6516830921173096, + "rewards/margins": 0.5272722244262695, + "rewards/rejected": -4.178955554962158, + "step": 432 + }, + { + "epoch": 0.9154938883382887, + "grad_norm": 5.815128326416016, + "learning_rate": 2.073065902741472e-08, + "logits/chosen": -0.5873744487762451, + "logits/rejected": -0.5638723969459534, + "logps/chosen": -1.9891620874404907, + "logps/rejected": -2.4962096214294434, + "loss": 1.1379, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.9783241748809814, + "rewards/margins": 1.0140951871871948, + "rewards/rejected": -4.992419242858887, + "step": 433 + }, + { + "epoch": 0.917608192930294, + "grad_norm": 5.057411193847656, + "learning_rate": 1.9688121698277993e-08, + "logits/chosen": -0.607324481010437, + "logits/rejected": -0.5964059829711914, + "logps/chosen": -1.8643240928649902, + "logps/rejected": -2.2751855850219727, + "loss": 1.2388, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.7286481857299805, + "rewards/margins": 0.8217229843139648, + "rewards/rejected": -4.550371170043945, + "step": 434 + }, + { + "epoch": 0.9197224975222993, + "grad_norm": 2.25390362739563, + "learning_rate": 1.8671953108797823e-08, + "logits/chosen": -0.6268022656440735, + "logits/rejected": -0.6332954168319702, + "logps/chosen": -1.945924997329712, + "logps/rejected": -2.330981731414795, + "loss": 1.1455, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.891849994659424, + "rewards/margins": 0.7701136469841003, + "rewards/rejected": -4.66196346282959, + "step": 435 + }, + { + "epoch": 0.9218368021143046, + "grad_norm": 3.9572856426239014, + "learning_rate": 1.7682209045820684e-08, + "logits/chosen": -0.6522207856178284, + "logits/rejected": -0.6930267810821533, + "logps/chosen": -1.8152984380722046, + "logps/rejected": -2.0800223350524902, + "loss": 1.2978, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.630596876144409, + "rewards/margins": 0.5294479727745056, + "rewards/rejected": -4.1600446701049805, + "step": 436 + }, + { + "epoch": 0.9239511067063099, + "grad_norm": 1.733438491821289, + "learning_rate": 1.671894384550743e-08, + "logits/chosen": -0.5977643728256226, + "logits/rejected": -0.5842040777206421, + "logps/chosen": -1.8794972896575928, + "logps/rejected": -2.413329601287842, + "loss": 1.0233, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.7589945793151855, + "rewards/margins": 1.0676649808883667, + "rewards/rejected": -4.826659202575684, + "step": 437 + }, + { + "epoch": 0.9260654112983152, + "grad_norm": 2.8760743141174316, + "learning_rate": 1.5782210390350713e-08, + "logits/chosen": -0.5813508033752441, + "logits/rejected": -0.5602753758430481, + "logps/chosen": -1.7892794609069824, + "logps/rejected": -2.32309627532959, + "loss": 1.0836, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.578558921813965, + "rewards/margins": 1.0676335096359253, + "rewards/rejected": -4.64619255065918, + "step": 438 + }, + { + "epoch": 0.9281797158903204, + "grad_norm": 5.760490894317627, + "learning_rate": 1.4872060106271179e-08, + "logits/chosen": -0.5673117637634277, + "logits/rejected": -0.5580011606216431, + "logps/chosen": -1.943117618560791, + "logps/rejected": -2.4581894874572754, + "loss": 1.1229, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.886235237121582, + "rewards/margins": 1.030144453048706, + "rewards/rejected": -4.916378974914551, + "step": 439 + }, + { + "epoch": 0.9302940204823258, + "grad_norm": 5.213393211364746, + "learning_rate": 1.3988542959794625e-08, + "logits/chosen": -0.5715171098709106, + "logits/rejected": -0.5791775584220886, + "logps/chosen": -1.961305022239685, + "logps/rejected": -2.4485957622528076, + "loss": 1.0877, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.92261004447937, + "rewards/margins": 0.9745810627937317, + "rewards/rejected": -4.897191524505615, + "step": 440 + }, + { + "epoch": 0.932408325074331, + "grad_norm": 2.670029878616333, + "learning_rate": 1.3131707455309004e-08, + "logits/chosen": -0.6612206101417542, + "logits/rejected": -0.569149374961853, + "logps/chosen": -1.9947882890701294, + "logps/rejected": -2.41544771194458, + "loss": 1.2501, + "rewards/accuracies": 0.65625, + "rewards/chosen": -3.989576578140259, + "rewards/margins": 0.8413184881210327, + "rewards/rejected": -4.83089542388916, + "step": 441 + }, + { + "epoch": 0.9345226296663363, + "grad_norm": 2.0773093700408936, + "learning_rate": 1.230160063240121e-08, + "logits/chosen": -0.5475001335144043, + "logits/rejected": -0.6024526953697205, + "logps/chosen": -1.9972546100616455, + "logps/rejected": -2.2212231159210205, + "loss": 1.2857, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.994509220123291, + "rewards/margins": 0.4479368031024933, + "rewards/rejected": -4.442446231842041, + "step": 442 + }, + { + "epoch": 0.9366369342583416, + "grad_norm": 2.6185569763183594, + "learning_rate": 1.1498268063274697e-08, + "logits/chosen": -0.6600778102874756, + "logits/rejected": -0.6794160604476929, + "logps/chosen": -1.7303975820541382, + "logps/rejected": -2.0589568614959717, + "loss": 1.183, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.4607951641082764, + "rewards/margins": 0.6571190357208252, + "rewards/rejected": -4.117913722991943, + "step": 443 + }, + { + "epoch": 0.9387512388503468, + "grad_norm": 2.7385923862457275, + "learning_rate": 1.0721753850247984e-08, + "logits/chosen": -0.6136504411697388, + "logits/rejected": -0.5926402807235718, + "logps/chosen": -1.9593303203582764, + "logps/rejected": -2.446382999420166, + "loss": 1.161, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.9186606407165527, + "rewards/margins": 0.9741055965423584, + "rewards/rejected": -4.892765998840332, + "step": 444 + }, + { + "epoch": 0.9408655434423522, + "grad_norm": 2.006077527999878, + "learning_rate": 9.972100623333035e-09, + "logits/chosen": -0.5911227464675903, + "logits/rejected": -0.5988056063652039, + "logps/chosen": -1.9767932891845703, + "logps/rejected": -2.307847499847412, + "loss": 1.2698, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -3.9535865783691406, + "rewards/margins": 0.6621084809303284, + "rewards/rejected": -4.615694999694824, + "step": 445 + }, + { + "epoch": 0.9429798480343574, + "grad_norm": 3.775676965713501, + "learning_rate": 9.249349537894968e-09, + "logits/chosen": -0.5951496958732605, + "logits/rejected": -0.5602840185165405, + "logps/chosen": -2.01466965675354, + "logps/rejected": -2.404120922088623, + "loss": 1.3551, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -4.02933931350708, + "rewards/margins": 0.7789022922515869, + "rewards/rejected": -4.808241844177246, + "step": 446 + }, + { + "epoch": 0.9450941526263628, + "grad_norm": 10.657898902893066, + "learning_rate": 8.553540272392967e-09, + "logits/chosen": -0.616013765335083, + "logits/rejected": -0.6068493127822876, + "logps/chosen": -1.9523563385009766, + "logps/rejected": -2.3371798992156982, + "loss": 1.2264, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.904712677001953, + "rewards/margins": 0.7696471810340881, + "rewards/rejected": -4.6743597984313965, + "step": 447 + }, + { + "epoch": 0.947208457218368, + "grad_norm": 5.239955902099609, + "learning_rate": 7.884711026201584e-09, + "logits/chosen": -0.5559091567993164, + "logits/rejected": -0.5499454140663147, + "logps/chosen": -1.9888339042663574, + "logps/rejected": -2.5645201206207275, + "loss": 1.1615, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.977667808532715, + "rewards/margins": 1.1513725519180298, + "rewards/rejected": -5.129040241241455, + "step": 448 + }, + { + "epoch": 0.9493227618103733, + "grad_norm": 4.970836162567139, + "learning_rate": 7.242898517513863e-09, + "logits/chosen": -0.6270098686218262, + "logits/rejected": -0.5990616083145142, + "logps/chosen": -2.0393564701080322, + "logps/rejected": -2.6450533866882324, + "loss": 1.0316, + "rewards/accuracies": 0.7421875, + "rewards/chosen": -4.0787129402160645, + "rewards/margins": 1.2113933563232422, + "rewards/rejected": -5.290106773376465, + "step": 449 + }, + { + "epoch": 0.9514370664023786, + "grad_norm": 4.401031494140625, + "learning_rate": 6.62813798132561e-09, + "logits/chosen": -0.6103833913803101, + "logits/rejected": -0.6355498433113098, + "logps/chosen": -1.9900306463241577, + "logps/rejected": -2.4799742698669434, + "loss": 1.1272, + "rewards/accuracies": 0.734375, + "rewards/chosen": -3.9800612926483154, + "rewards/margins": 0.979887843132019, + "rewards/rejected": -4.959948539733887, + "step": 450 + }, + { + "epoch": 0.9535513709943839, + "grad_norm": 5.162088871002197, + "learning_rate": 6.040463167500509e-09, + "logits/chosen": -0.6351377367973328, + "logits/rejected": -0.6445170044898987, + "logps/chosen": -2.017266035079956, + "logps/rejected": -2.4103317260742188, + "loss": 1.2591, + "rewards/accuracies": 0.6953125, + "rewards/chosen": -4.034532070159912, + "rewards/margins": 0.7861310243606567, + "rewards/rejected": -4.8206634521484375, + "step": 451 + }, + { + "epoch": 0.9556656755863892, + "grad_norm": 3.158773422241211, + "learning_rate": 5.4799063389179834e-09, + "logits/chosen": -0.6216992139816284, + "logits/rejected": -0.6317836046218872, + "logps/chosen": -1.9916179180145264, + "logps/rejected": -2.476783275604248, + "loss": 1.192, + "rewards/accuracies": 0.671875, + "rewards/chosen": -3.9832358360290527, + "rewards/margins": 0.970331072807312, + "rewards/rejected": -4.953566551208496, + "step": 452 + }, + { + "epoch": 0.9577799801783945, + "grad_norm": 4.7540435791015625, + "learning_rate": 4.946498269701616e-09, + "logits/chosen": -0.652457594871521, + "logits/rejected": -0.6148388385772705, + "logps/chosen": -2.0300891399383545, + "logps/rejected": -2.5610132217407227, + "loss": 1.0769, + "rewards/accuracies": 0.734375, + "rewards/chosen": -4.060178279876709, + "rewards/margins": 1.061848759651184, + "rewards/rejected": -5.122026443481445, + "step": 453 + }, + { + "epoch": 0.9598942847703997, + "grad_norm": 4.686556339263916, + "learning_rate": 4.440268243529666e-09, + "logits/chosen": -0.5588012337684631, + "logits/rejected": -0.5526341199874878, + "logps/chosen": -1.8666988611221313, + "logps/rejected": -2.3390815258026123, + "loss": 1.1768, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -3.7333977222442627, + "rewards/margins": 0.9447645545005798, + "rewards/rejected": -4.678163051605225, + "step": 454 + }, + { + "epoch": 0.9620085893624051, + "grad_norm": 2.740269422531128, + "learning_rate": 3.961244052027413e-09, + "logits/chosen": -0.6438521146774292, + "logits/rejected": -0.6682748198509216, + "logps/chosen": -2.0076475143432617, + "logps/rejected": -2.388810396194458, + "loss": 1.2689, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.015295028686523, + "rewards/margins": 0.7623259425163269, + "rewards/rejected": -4.777620792388916, + "step": 455 + }, + { + "epoch": 0.9641228939544103, + "grad_norm": 2.9197144508361816, + "learning_rate": 3.509451993241541e-09, + "logits/chosen": -0.5822494029998779, + "logits/rejected": -0.5853508114814758, + "logps/chosen": -1.8848122358322144, + "logps/rejected": -2.4192898273468018, + "loss": 1.0924, + "rewards/accuracies": 0.71875, + "rewards/chosen": -3.7696244716644287, + "rewards/margins": 1.0689555406570435, + "rewards/rejected": -4.8385796546936035, + "step": 456 + }, + { + "epoch": 0.9662371985464155, + "grad_norm": 4.501737117767334, + "learning_rate": 3.084916870196297e-09, + "logits/chosen": -0.5652188658714294, + "logits/rejected": -0.5740686655044556, + "logps/chosen": -1.9216543436050415, + "logps/rejected": -2.23102068901062, + "loss": 1.2907, + "rewards/accuracies": 0.6328125, + "rewards/chosen": -3.843308687210083, + "rewards/margins": 0.618732750415802, + "rewards/rejected": -4.46204137802124, + "step": 457 + }, + { + "epoch": 0.9683515031384209, + "grad_norm": 3.512376070022583, + "learning_rate": 2.687661989531964e-09, + "logits/chosen": -0.6515664458274841, + "logits/rejected": -0.6550417542457581, + "logps/chosen": -1.9334843158721924, + "logps/rejected": -2.2688543796539307, + "loss": 1.2578, + "rewards/accuracies": 0.578125, + "rewards/chosen": -3.8669686317443848, + "rewards/margins": 0.67074054479599, + "rewards/rejected": -4.537708759307861, + "step": 458 + }, + { + "epoch": 0.9704658077304261, + "grad_norm": 2.165844678878784, + "learning_rate": 2.3177091602251675e-09, + "logits/chosen": -0.6218724250793457, + "logits/rejected": -0.5920112729072571, + "logps/chosen": -1.8584281206130981, + "logps/rejected": -2.366225242614746, + "loss": 1.1553, + "rewards/accuracies": 0.6875, + "rewards/chosen": -3.7168562412261963, + "rewards/margins": 1.0155941247940063, + "rewards/rejected": -4.732450485229492, + "step": 459 + }, + { + "epoch": 0.9725801123224315, + "grad_norm": 1.7227884531021118, + "learning_rate": 1.975078692391552e-09, + "logits/chosen": -0.5791985988616943, + "logits/rejected": -0.5785022974014282, + "logps/chosen": -1.8981022834777832, + "logps/rejected": -2.3716633319854736, + "loss": 1.1642, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.7962045669555664, + "rewards/margins": 0.9471220970153809, + "rewards/rejected": -4.743326663970947, + "step": 460 + }, + { + "epoch": 0.9746944169144367, + "grad_norm": 3.150090217590332, + "learning_rate": 1.659789396171063e-09, + "logits/chosen": -0.6548072695732117, + "logits/rejected": -0.6290433406829834, + "logps/chosen": -2.0168204307556152, + "logps/rejected": -2.520479679107666, + "loss": 1.0736, + "rewards/accuracies": 0.765625, + "rewards/chosen": -4.0336408615112305, + "rewards/margins": 1.0073186159133911, + "rewards/rejected": -5.040959358215332, + "step": 461 + }, + { + "epoch": 0.976808721506442, + "grad_norm": 1.256157636642456, + "learning_rate": 1.37185858069494e-09, + "logits/chosen": -0.7094852328300476, + "logits/rejected": -0.7226460576057434, + "logps/chosen": -1.8896048069000244, + "logps/rejected": -2.4871973991394043, + "loss": 1.0536, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -3.779209613800049, + "rewards/margins": 1.1951854228973389, + "rewards/rejected": -4.974394798278809, + "step": 462 + }, + { + "epoch": 0.9789230260984473, + "grad_norm": 2.8358895778656006, + "learning_rate": 1.1113020531357541e-09, + "logits/chosen": -0.6778469085693359, + "logits/rejected": -0.6957201957702637, + "logps/chosen": -2.0275380611419678, + "logps/rejected": -2.470618963241577, + "loss": 1.1801, + "rewards/accuracies": 0.6640625, + "rewards/chosen": -4.0550761222839355, + "rewards/margins": 0.886161208152771, + "rewards/rejected": -4.941237926483154, + "step": 463 + }, + { + "epoch": 0.9810373306904526, + "grad_norm": 2.8881914615631104, + "learning_rate": 8.781341178393242e-10, + "logits/chosen": -0.5639821887016296, + "logits/rejected": -0.5891467928886414, + "logps/chosen": -2.0047199726104736, + "logps/rejected": -2.522782802581787, + "loss": 1.1948, + "rewards/accuracies": 0.640625, + "rewards/chosen": -4.009439945220947, + "rewards/margins": 1.0361257791519165, + "rewards/rejected": -5.045565605163574, + "step": 464 + }, + { + "epoch": 0.9831516352824579, + "grad_norm": 3.421194314956665, + "learning_rate": 6.723675755396229e-10, + "logits/chosen": -0.540326714515686, + "logits/rejected": -0.5159227252006531, + "logps/chosen": -1.88228178024292, + "logps/rejected": -2.2003138065338135, + "loss": 1.2191, + "rewards/accuracies": 0.59375, + "rewards/chosen": -3.76456356048584, + "rewards/margins": 0.6360642313957214, + "rewards/rejected": -4.400627613067627, + "step": 465 + }, + { + "epoch": 0.9852659398744632, + "grad_norm": 4.243066310882568, + "learning_rate": 4.940137226560615e-10, + "logits/chosen": -0.6175463795661926, + "logits/rejected": -0.6400432586669922, + "logps/chosen": -1.9547748565673828, + "logps/rejected": -2.4598965644836426, + "loss": 1.2589, + "rewards/accuracies": 0.640625, + "rewards/chosen": -3.9095497131347656, + "rewards/margins": 1.0102434158325195, + "rewards/rejected": -4.919793128967285, + "step": 466 + }, + { + "epoch": 0.9873802444664684, + "grad_norm": 3.3425028324127197, + "learning_rate": 3.430823506730962e-10, + "logits/chosen": -0.5236034393310547, + "logits/rejected": -0.48699086904525757, + "logps/chosen": -2.167372465133667, + "logps/rejected": -2.687620162963867, + "loss": 1.2024, + "rewards/accuracies": 0.6796875, + "rewards/chosen": -4.334744930267334, + "rewards/margins": 1.0404952764511108, + "rewards/rejected": -5.375240325927734, + "step": 467 + }, + { + "epoch": 0.9894945490584738, + "grad_norm": 3.1803112030029297, + "learning_rate": 2.1958174560282594e-10, + "logits/chosen": -0.6515716910362244, + "logits/rejected": -0.6526726484298706, + "logps/chosen": -2.0350496768951416, + "logps/rejected": -2.4857177734375, + "loss": 1.1524, + "rewards/accuracies": 0.6484375, + "rewards/chosen": -4.070099353790283, + "rewards/margins": 0.9013361930847168, + "rewards/rejected": -4.971435546875, + "step": 468 + }, + { + "epoch": 0.991608853650479, + "grad_norm": 2.8402769565582275, + "learning_rate": 1.2351868753018858e-10, + "logits/chosen": -0.5555111765861511, + "logits/rejected": -0.5084383487701416, + "logps/chosen": -1.9741497039794922, + "logps/rejected": -2.5360653400421143, + "loss": 1.0956, + "rewards/accuracies": 0.703125, + "rewards/chosen": -3.9482994079589844, + "rewards/margins": 1.1238315105438232, + "rewards/rejected": -5.0721306800842285, + "step": 469 + }, + { + "epoch": 0.9937231582424844, + "grad_norm": 14.110418319702148, + "learning_rate": 5.4898450240536964e-11, + "logits/chosen": -0.6210866570472717, + "logits/rejected": -0.614806056022644, + "logps/chosen": -2.0763094425201416, + "logps/rejected": -2.5026116371154785, + "loss": 1.2184, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -4.152618885040283, + "rewards/margins": 0.8526046276092529, + "rewards/rejected": -5.005223274230957, + "step": 470 + }, + { + "epoch": 0.9958374628344896, + "grad_norm": 2.8393566608428955, + "learning_rate": 1.3724800930314805e-11, + "logits/chosen": -0.5895847678184509, + "logits/rejected": -0.6269129514694214, + "logps/chosen": -1.8787530660629272, + "logps/rejected": -2.4467647075653076, + "loss": 1.0714, + "rewards/accuracies": 0.7265625, + "rewards/chosen": -3.7575061321258545, + "rewards/margins": 1.1360235214233398, + "rewards/rejected": -4.893529415130615, + "step": 471 + }, + { + "epoch": 0.9979517674264948, + "grad_norm": 3.9959075450897217, + "learning_rate": 0.0, + "logits/chosen": -0.6461910009384155, + "logits/rejected": -0.6503991484642029, + "logps/chosen": -1.798724889755249, + "logps/rejected": -2.3589823246002197, + "loss": 1.0133, + "rewards/accuracies": 0.7421875, + "rewards/chosen": -3.597449779510498, + "rewards/margins": 1.1205153465270996, + "rewards/rejected": -4.7179646492004395, + "step": 472 + }, + { + "epoch": 0.9979517674264948, + "step": 472, + "total_flos": 0.0, + "train_loss": 1.280224425307775, + "train_runtime": 38087.5267, + "train_samples_per_second": 1.589, + "train_steps_per_second": 0.012 + } + ], + "logging_steps": 1, + "max_steps": 472, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}